Linux内存管理之slab机制（初始化）

三里屯摇滚 发表于 2012-01-10 10:26

Linux内存管理之slab机制（初始化）

一、内核启动早期初始化
start_kernel()->mm_init()->kmem_cache_init()

执行流程：

1，初始化静态initkmem_list3三链；

2，初始化cache_cache的nodelists字段为1中的三链；

3，根据内存情况初始化每个slab占用的页面数变量slab_break_gfp_order；

4，将cache_cache加入cache_chain链表中，初始化cache_cache；

5，创建kmalloc所用的general cache：

1）cache的名称和大小存放在两个数据结构对应的数组中，对应大小的cache可以从size数组中找到；

2）先创建INDEX_AC和INDEX_L3下标的cache；

3）循环创建size数组中各个大小的cache；

6，替换静态本地cache全局变量：

1) 替换cache_cache中的arry_cache,本来指向静态变量initarray_cache.cache；

2) 替换malloc_sizes.cs_cachep的local cache，原本指向静态变量initarray_generic.cache；

7，替换静态三链

1）替换cache_cache三链，原本指向静态变量initkmem_list3；

2）替换malloc_sizes.cs_cachep三链，原本指向静态变量initkmem_list3；

8，更新初始化进度

view plaincopy to clipboardprint?/*
* Initialisation.Called after the page allocator have been initialised and
* before smp_init().
*/
void __init kmem_cache_init(void)
{
size_t left_over;
struct cache_sizes *sizes;
struct cache_names *names;
int i;
int order;
int node;
/* 在slab初始化好之前，无法通过kmalloc分配初始化过程中必要的一些对象
，只能使用静态的全局变量
，待slab初始化后期，再使用kmalloc动态分配的对象替换全局变量 */

/* 如前所述，先借用全局变量initkmem_list3表示的slab三链
，每个内存节点对应一组slab三链。initkmem_list3是个slab三链数组，对于每个内存节点，包含三组
：struct kmem_cache的slab三链、struct arraycache_init的slab 三链、struct kmem_list3的slab三链
。这里循环初始化所有内存节点的所有slab三链 */
if (num_possible_nodes() == 1)
   use_alien_caches = 0;
/*初始化所有node的所有slab中的三个链表*/
for (i = 0; i < NUM_INIT_LISTS; i++) {
   kmem_list3_init(&initkmem_list3);
   /* 全局变量cache_cache指向的slab cache包含所有struct kmem_cache对象，不包含cache_cache本身
   。这里初始化所有内存节点的struct kmem_cache的slab三链为空。*/
   if (i < MAX_NUMNODES)
         cache_cache.nodelists = NULL;
}
/* 设置struct kmem_cache的slab三链指向initkmem_list3中的一组slab三链，
CACHE_CACHE为cache在内核cache链表中的索引，
struct kmem_cache对应的cache是内核中创建的第一个cache
，故CACHE_CACHE为0 */
set_up_list3s(&cache_cache, CACHE_CACHE);

/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
*/
/* 全局变量slab_break_gfp_order为每个slab最多占用几个页面
，用来抑制碎片，比如大小为3360的对象
，如果其slab只占一个页面，碎片为736
，slab占用两个页面，则碎片大小也翻倍
。只有当对象很大
，以至于slab中连一个对象都放不下时
，才可以超过这个值
。有两个可能的取值
：当可用内存大于32MB时
，BREAK_GFP_ORDER_HI为1
，即每个slab最多占用2个页面
，只有当对象大小大于8192时
，才可以突破slab_break_gfp_order的限制
。小于等于32MB时BREAK_GFP_ORDER_LO为0。*/
if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
   slab_break_gfp_order = BREAK_GFP_ORDER_HI;

/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
* 1) initialize the cache_cache cache: it contains the struct
* kmem_cache structures of all caches, except cache_cache itself:
* cache_cache is statically allocated.
* Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
* 2) Create the first kmalloc cache.
* The struct kmem_cache for the new cache is allocated normally.
* An __init data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized
* head arrays.
* 4) Replace the __init data head arrays for cache_cache and the first
* kmalloc cache with kmalloc allocated arrays.
* 5) Replace the __init data for kmem_list3 for cache_cache and
* the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/

node = numa_node_id();

/* 1) create the cache_cache */
/* 第一步，创建struct kmem_cache所在的cache，由全局变量cache_cache指向
，这里只是初始化数据结构
，并未真正创建这些对象，要待分配时才创建。*/
/* 全局变量cache_chain是内核slab cache链表的表头 */
INIT_LIST_HEAD(&cache_chain);

/* 将cache_cache加入到slab cache链表 */
list_add(&cache_cache.next, &cache_chain);

/* 设置cache着色基本单位为cache line的大小：32字节 */
cache_cache.colour_off = cache_line_size();
/*初始化cache_cache的local cache，同样这里也不能使用kmalloc
，需要使用静态分配的全局变量initarray_cache */
cache_cache.array = &initarray_cache.cache;
/* 初始化slab链表 ,用全局变量*/
cache_cache.nodelists = &initkmem_list3;

/*
* struct kmem_cache size depends on nr_node_ids, which
* can be less than MAX_NUMNODES.
*/
/* buffer_size保存slab中对象的大小，这里是计算struct kmem_cache的大小
， nodelists是最后一个成员
，nr_node_ids保存内存节点个数，UMA为1
，所以nodelists偏移加上1个struct kmem_list3 的大小即为struct kmem_cache的大小 */
cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
            nr_node_ids * sizeof(struct kmem_list3 *);
#if DEBUG
cache_cache.obj_size = cache_cache.buffer_size;
#endif
/* 将对象大小与cache line大小对齐 */
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
               cache_line_size());
/* 计算对象大小的倒数，用于计算对象在slab中的索引 */
cache_cache.reciprocal_buffer_size =
   reciprocal_value(cache_cache.buffer_size);

for (order = 0; order < MAX_ORDER; order++) {
   /* 计算cache_cache中的对象数目 */
   cache_estimate(order, cache_cache.buffer_size,
         cache_line_size(), 0, &left_over, &cache_cache.num);
   /* num不为0意味着创建struct kmem_cache对象成功，退出 */
   if (cache_cache.num)
         break;
}
BUG_ON(!cache_cache.num);
/* gfporder表示本slab包含2^gfporder个页面 */
cache_cache.gfporder = order;
   /* 着色区的大小，以colour_off为单位 */
cache_cache.colour = left_over / cache_cache.colour_off;
/* slab管理对象的大小 */
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
                  sizeof(struct slab), cache_line_size());

/* 2+3) create the kmalloc caches */
/* 第二步，创建kmalloc所用的general cache
，kmalloc所用的对象按大小分级
，malloc_sizes保存大小，cache_names保存cache名 */
sizes = malloc_sizes;
names = cache_names;

/*
* Initialize the caches that provide memory for the array cache and the
* kmem_list3 structures first.Without this, further allocations will
* bug.
*/
/* 首先创建struct array_cache和struct kmem_list3所用的general cache
，它们是后续初始化动作的基础 */
/* INDEX_AC是计算local cache所用的struct arraycache_init对象在kmalloc size中的索引
，即属于哪一级别大小的general cache
，创建此大小级别的cache为local cache所用 */
sizes.cs_cachep = kmem_cache_create(names.name,
               sizes.cs_size,
               ARCH_KMALLOC_MINALIGN,
               ARCH_KMALLOC_FLAGS|SLAB_PANIC,
               NULL);
/* 如果struct kmem_list3和struct arraycache_init对应的kmalloc size索引不同
，即大小属于不同的级别
，则创建struct kmem_list3所用的cache，否则共用一个cache */
if (INDEX_AC != INDEX_L3) {
   sizes.cs_cachep =
         kmem_cache_create(names.name,
            sizes.cs_size,
            ARCH_KMALLOC_MINALIGN,
            ARCH_KMALLOC_FLAGS|SLAB_PANIC,
            NULL);
}
/* 创建完上述两个general cache后，slab early init阶段结束，在此之前
，不允许创建外置式slab */
slab_early_init = 0;

/* 循环创建kmalloc各级别的general cache */
while (sizes->cs_size != ULONG_MAX) {
   /*
      * For performance, all the general caches are L1 aligned.
      * This should be particularly beneficial on SMP boxes, as it
      * eliminates "false sharing".
      * Note for systems short on memory removing the alignment will
      * allow tighter packing of the smaller caches.
      */
      /* 某级别的kmalloc cache还未创建，创建之，struct kmem_list3和
      struct arraycache_init对应的cache已经创建过了 */
   if (!sizes->cs_cachep) {
         sizes->cs_cachep = kmem_cache_create(names->name,
               sizes->cs_size,
               ARCH_KMALLOC_MINALIGN,
               ARCH_KMALLOC_FLAGS|SLAB_PANIC,
               NULL);
   }
#ifdef CONFIG_ZONE_DMA
   sizes->cs_dmacachep = kmem_cache_create(
               names->name_dma,
               sizes->cs_size,
               ARCH_KMALLOC_MINALIGN,
               ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
                     SLAB_PANIC,
               NULL);
#endif
   sizes++;
   names++;
}
/* 至此，kmalloc general cache已经创建完毕，可以拿来使用了 */
/* 4) Replace the bootstrap head arrays */
/* 第四步，用kmalloc对象替换静态分配的全局变量
。到目前为止一共使用了两个全局local cache
，一个是cache_cache的local cache指向initarray_cache.cache
，另一个是malloc_sizes.cs_cachep的local cache指向initarray_generic.cache
，参见setup_cpu_cache函数。这里替换它们。*/
{
   struct array_cache *ptr;
   /* 申请cache_cache所用local cache的空间 */
   ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

   BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
   /* 复制原cache_cache的local cache，即initarray_cache，到新的位置 */
   memcpy(ptr, cpu_cache_get(&cache_cache),
            sizeof(struct arraycache_init));
   /*
      * Do not assume that spinlocks can be initialized via memcpy:
      */
   spin_lock_init(&ptr->lock);
   /* cache_cache的local cache指向新的位置 */
   cache_cache.array = ptr;
   /* 申请malloc_sizes.cs_cachep所用local cache的空间 */
   ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

   BUG_ON(cpu_cache_get(malloc_sizes.cs_cachep)
            != &initarray_generic.cache);
   /* 复制原local cache到新分配的位置，注意此时local cache的大小是固定的 */
   memcpy(ptr, cpu_cache_get(malloc_sizes.cs_cachep),
            sizeof(struct arraycache_init));
   /*
      * Do not assume that spinlocks can be initialized via memcpy:
      */
   spin_lock_init(&ptr->lock);

   malloc_sizes.cs_cachep->array =
         ptr;
}
/* 5) Replace the bootstrap kmem_list3's */
/* 第五步，与第四步类似，用kmalloc的空间替换静态分配的slab三链 */
{
   int nid;
   /* UMA只有一个节点 */
   for_each_online_node(nid) {
         /* 复制struct kmem_cache的slab三链 */
         init_list(&cache_cache, &initkmem_list3, nid);
         /* 复制struct arraycache_init的slab三链 */
         init_list(malloc_sizes.cs_cachep,
               &initkmem_list3, nid);
         /* 复制struct kmem_list3的slab三链 */
         if (INDEX_AC != INDEX_L3) {
            init_list(malloc_sizes.cs_cachep,
                  &initkmem_list3, nid);
         }
   }
}
/* 更新slab系统初始化进度 */
g_cpucache_up = EARLY;
}
/*
* Initialisation.Called after the page allocator have been initialised and
* before smp_init().
*/
void __init kmem_cache_init(void)
{
size_t left_over;
struct cache_sizes *sizes;
struct cache_names *names;
int i;
int order;
int node;
/* 在slab初始化好之前，无法通过kmalloc分配初始化过程中必要的一些对象
，只能使用静态的全局变量
，待slab初始化后期，再使用kmalloc动态分配的对象替换全局变量 */

/* 如前所述，先借用全局变量initkmem_list3表示的slab三链
，每个内存节点对应一组slab三链。initkmem_list3是个slab三链数组，对于每个内存节点，包含三组
：struct kmem_cache的slab三链、struct arraycache_init的slab 三链、struct kmem_list3的slab三链
。这里循环初始化所有内存节点的所有slab三链 */
if (num_possible_nodes() == 1)
use_alien_caches = 0;
/*初始化所有node的所有slab中的三个链表*/
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3);
/* 全局变量cache_cache指向的slab cache包含所有struct kmem_cache对象，不包含cache_cache本身
。这里初始化所有内存节点的struct kmem_cache的slab三链为空。*/
if (i < MAX_NUMNODES)
cache_cache.nodelists = NULL;
}
/* 设置struct kmem_cache的slab三链指向initkmem_list3中的一组slab三链，
CACHE_CACHE为cache在内核cache链表中的索引，
struct kmem_cache对应的cache是内核中创建的第一个cache
，故CACHE_CACHE为0 */
set_up_list3s(&cache_cache, CACHE_CACHE);

/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
*/
/* 全局变量slab_break_gfp_order为每个slab最多占用几个页面
，用来抑制碎片，比如大小为3360的对象
，如果其slab只占一个页面，碎片为736
，slab占用两个页面，则碎片大小也翻倍
。只有当对象很大
，以至于slab中连一个对象都放不下时
，才可以超过这个值
。有两个可能的取值
：当可用内存大于32MB时
，BREAK_GFP_ORDER_HI为1
，即每个slab最多占用2个页面
，只有当对象大小大于8192时
，才可以突破slab_break_gfp_order的限制
。小于等于32MB时BREAK_GFP_ORDER_LO为0。*/
if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = BREAK_GFP_ORDER_HI;

/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
* 1) initialize the cache_cache cache: it contains the struct
* kmem_cache structures of all caches, except cache_cache itself:
* cache_cache is statically allocated.
* Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
* 2) Create the first kmalloc cache.
* The struct kmem_cache for the new cache is allocated normally.
* An __init data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized
* head arrays.
* 4) Replace the __init data head arrays for cache_cache and the first
* kmalloc cache with kmalloc allocated arrays.
* 5) Replace the __init data for kmem_list3 for cache_cache and
* the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/

node = numa_node_id();

/* 1) create the cache_cache */
/* 第一步，创建struct kmem_cache所在的cache，由全局变量cache_cache指向
，这里只是初始化数据结构
，并未真正创建这些对象，要待分配时才创建。*/
/* 全局变量cache_chain是内核slab cache链表的表头 */
INIT_LIST_HEAD(&cache_chain);

/* 将cache_cache加入到slab cache链表 */
list_add(&cache_cache.next, &cache_chain);

/* 设置cache着色基本单位为cache line的大小：32字节 */
cache_cache.colour_off = cache_line_size();
/*初始化cache_cache的local cache，同样这里也不能使用kmalloc
，需要使用静态分配的全局变量initarray_cache */
cache_cache.array = &initarray_cache.cache;
/* 初始化slab链表 ,用全局变量*/
cache_cache.nodelists = &initkmem_list3;

/*
* struct kmem_cache size depends on nr_node_ids, which
* can be less than MAX_NUMNODES.
*/
/* buffer_size保存slab中对象的大小，这里是计算struct kmem_cache的大小
， nodelists是最后一个成员
，nr_node_ids保存内存节点个数，UMA为1
，所以nodelists偏移加上1个struct kmem_list3 的大小即为struct kmem_cache的大小 */
cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
nr_node_ids * sizeof(struct kmem_list3 *);
#if DEBUG
cache_cache.obj_size = cache_cache.buffer_size;
#endif
/* 将对象大小与cache line大小对齐 */
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
/* 计算对象大小的倒数，用于计算对象在slab中的索引 */
cache_cache.reciprocal_buffer_size =
reciprocal_value(cache_cache.buffer_size);

for (order = 0; order < MAX_ORDER; order++) {
/* 计算cache_cache中的对象数目 */
cache_estimate(order, cache_cache.buffer_size,
cache_line_size(), 0, &left_over, &cache_cache.num);
/* num不为0意味着创建struct kmem_cache对象成功，退出 */
if (cache_cache.num)
break;
}
BUG_ON(!cache_cache.num);
/* gfporder表示本slab包含2^gfporder个页面 */
cache_cache.gfporder = order;
   /* 着色区的大小，以colour_off为单位 */
cache_cache.colour = left_over / cache_cache.colour_off;
/* slab管理对象的大小 */
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
      sizeof(struct slab), cache_line_size());

/* 2+3) create the kmalloc caches */
/* 第二步，创建kmalloc所用的general cache
，kmalloc所用的对象按大小分级
，malloc_sizes保存大小，cache_names保存cache名 */
sizes = malloc_sizes;
names = cache_names;

/*
* Initialize the caches that provide memory for the array cache and the
* kmem_list3 structures first.Without this, further allocations will
* bug.
*/
/* 首先创建struct array_cache和struct kmem_list3所用的general cache
，它们是后续初始化动作的基础 */
/* INDEX_AC是计算local cache所用的struct arraycache_init对象在kmalloc size中的索引
，即属于哪一级别大小的general cache
，创建此大小级别的cache为local cache所用 */
sizes.cs_cachep = kmem_cache_create(names.name,
sizes.cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
/* 如果struct kmem_list3和struct arraycache_init对应的kmalloc size索引不同
，即大小属于不同的级别
，则创建struct kmem_list3所用的cache，否则共用一个cache */
if (INDEX_AC != INDEX_L3) {
sizes.cs_cachep =
kmem_cache_create(names.name,
sizes.cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
}
/* 创建完上述两个general cache后，slab early init阶段结束，在此之前
，不允许创建外置式slab */
slab_early_init = 0;

/* 循环创建kmalloc各级别的general cache */
while (sizes->cs_size != ULONG_MAX) {
/*
* For performance, all the general caches are L1 aligned.
* This should be particularly beneficial on SMP boxes, as it
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches.
*/
/* 某级别的kmalloc cache还未创建，创建之，struct kmem_list3和
struct arraycache_init对应的cache已经创建过了 */
if (!sizes->cs_cachep) {
sizes->cs_cachep = kmem_cache_create(names->name,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
}

在我心中舞动 发表于 2012-01-10 10:29

谢谢分享

页: [1]

Chinaunix's Archiver

Linux内存管理之slab机制（初始化）