- 论坛徽章:
- 0
|
Linux内存管理之slab机制(创建slab)
Linux内核中创建slab主要由函数cache_grow()实现,从slab的创建中我们可以完整地看到slab与对象、页面的组织方式。
view plaincopy to clipboardprint?- 01./*
- 02. * Grow (by 1) the number of slabs within a cache. This is called by
- 03. * kmem_cache_alloc() when there are no active objs left in a cache.
- 04. */
- 05. /*使用一个或多个页面创建一个空slab。
- 06.objp:页面虚拟地址,为空表示还未申请内存页,不为空
- 07.,说明已申请内存页,可直接用来创建slab*/
- 08.static int cache_grow(struct kmem_cache *cachep,
- 09. gfp_t flags, int nodeid, void *objp)
- 10.{
- 11. struct slab *slabp;
- 12. size_t offset;
- 13. gfp_t local_flags;
- 14. struct kmem_list3 *l3;
- 15.
- 16. /*
- 17. * Be lazy and only check for valid flags here, keeping it out of the
- 18. * critical path in kmem_cache_alloc().
- 19. */
- 20. BUG_ON(flags & GFP_SLAB_BUG_MASK);
- 21. local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
- 22.
- 23. /* Take the l3 list lock to change the colour_next on this node */
- 24. check_irq_off();
- 25. /* 获得本内存节点的slab三链 */
- 26. l3 = cachep->nodelists[nodeid];
- 27. spin_lock(&l3->list_lock);
- 28.
- 29. /* Get colour for the slab, and cal the next value. */
- 30. /* 获得本slab的着色区偏移 */
- 31. offset = l3->colour_next;
- 32. /* 更新着色区偏移,使不同slab的着色偏移不同 */
- 33. l3->colour_next++;
- 34. /* 不能超过着色区的总大小,如果超过了,重置为0。这就是前面分析过的着色循环问题
- 35. 。事实上,如果slab中浪费的空间很少,那么很快就会循环一次。*/
- 36. if (l3->colour_next >= cachep->colour)
- 37. l3->colour_next = 0;
- 38. spin_unlock(&l3->list_lock);
- 39. /* 将着色单位区间的个数转换为着色区大小 */
- 40. offset *= cachep->colour_off;
- 41.
- 42. if (local_flags & __GFP_WAIT)
- 43. local_irq_enable();
- 44.
- 45. /*
- 46. * The test for missing atomic flag is performed here, rather than
- 47. * the more obvious place, simply to reduce the critical path length
- 48. * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
- 49. * will eventually be caught here (where it matters).
- 50. */
- 51. kmem_flagcheck(cachep, flags);
- 52.
- 53. /*
- 54. * Get mem for the objs. Attempt to allocate a physical page from
- 55. * 'nodeid'.
- 56. */
- 57. if (!objp)/* 还未分配页面,从本内存节点分配1<<cachep->gfporder个页面
- 58. ,objp为slab首页面的虚拟地址 */
- 59. objp = kmem_getpages(cachep, local_flags, nodeid);
- 60. if (!objp)
- 61. goto failed;
- 62.
- 63. /* Get slab management. */
- 64. /* 分配slab管理对象 */
- 65. slabp = alloc_slabmgmt(cachep, objp, offset,
- 66. local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
- 67. if (!slabp)
- 68. goto opps1;
- 69. /* 设置page到cache、slab的映射 */
- 70. slab_map_pages(cachep, slabp, objp);
- 71.
- 72. /* 初始化slab中的对象 */
- 73. cache_init_objs(cachep, slabp);
- 74.
- 75. if (local_flags & __GFP_WAIT)
- 76. local_irq_disable();
- 77. check_irq_off();
- 78. spin_lock(&l3->list_lock);
- 79.
- 80. /* Make slab active. */
- 81. list_add_tail(&slabp->list, &(l3->slabs_free));
- 82. /* 更新本cache增长计数 */
- 83. STATS_INC_GROWN(cachep);
- 84. /* 更新slab链表中空闲对象计数 */
- 85. l3->free_objects += cachep->num;
- 86. spin_unlock(&l3->list_lock);
- 87. return 1;
- 88.opps1:
- 89. kmem_freepages(cachep, objp);
- 90.failed:
- 91. if (local_flags & __GFP_WAIT)
- 92. local_irq_disable();
- 93. return 0;
- 94.}
- /*
- * Grow (by 1) the number of slabs within a cache. This is called by
- * kmem_cache_alloc() when there are no active objs left in a cache.
- */
- /*使用一个或多个页面创建一个空slab。
- objp:页面虚拟地址,为空表示还未申请内存页,不为空
- ,说明已申请内存页,可直接用来创建slab*/
- static int cache_grow(struct kmem_cache *cachep,
- gfp_t flags, int nodeid, void *objp)
- {
- struct slab *slabp;
- size_t offset;
- gfp_t local_flags;
- struct kmem_list3 *l3;
- /*
- * Be lazy and only check for valid flags here, keeping it out of the
- * critical path in kmem_cache_alloc().
- */
- BUG_ON(flags & GFP_SLAB_BUG_MASK);
- local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
- /* Take the l3 list lock to change the colour_next on this node */
- check_irq_off();
- /* 获得本内存节点的slab三链 */
- l3 = cachep->nodelists[nodeid];
- spin_lock(&l3->list_lock);
- /* Get colour for the slab, and cal the next value. */
- /* 获得本slab的着色区偏移 */
- offset = l3->colour_next;
- /* 更新着色区偏移,使不同slab的着色偏移不同 */
- l3->colour_next++;
- /* 不能超过着色区的总大小,如果超过了,重置为0。这就是前面分析过的着色循环问题
- 。事实上,如果slab中浪费的空间很少,那么很快就会循环一次。*/
- if (l3->colour_next >= cachep->colour)
- l3->colour_next = 0;
- spin_unlock(&l3->list_lock);
- /* 将着色单位区间的个数转换为着色区大小 */
- offset *= cachep->colour_off;
- if (local_flags & __GFP_WAIT)
- local_irq_enable();
- /*
- * The test for missing atomic flag is performed here, rather than
- * the more obvious place, simply to reduce the critical path length
- * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
- * will eventually be caught here (where it matters).
- */
- kmem_flagcheck(cachep, flags);
- /*
- * Get mem for the objs. Attempt to allocate a physical page from
- * 'nodeid'.
- */
- if (!objp)/* 还未分配页面,从本内存节点分配1<<cachep->gfporder个页面
- ,objp为slab首页面的虚拟地址 */
- objp = kmem_getpages(cachep, local_flags, nodeid);
- if (!objp)
- goto failed;
- /* Get slab management. */
- /* 分配slab管理对象 */
- slabp = alloc_slabmgmt(cachep, objp, offset,
- local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
- if (!slabp)
- goto opps1;
- /* 设置page到cache、slab的映射 */
- slab_map_pages(cachep, slabp, objp);
- /* 初始化slab中的对象 */
- cache_init_objs(cachep, slabp);
- if (local_flags & __GFP_WAIT)
- local_irq_disable();
- check_irq_off();
- spin_lock(&l3->list_lock);
- /* Make slab active. */
- list_add_tail(&slabp->list, &(l3->slabs_free));
- /* 更新本cache增长计数 */
- STATS_INC_GROWN(cachep);
- /* 更新slab链表中空闲对象计数 */
- l3->free_objects += cachep->num;
- spin_unlock(&l3->list_lock);
- return 1;
- opps1:
- kmem_freepages(cachep, objp);
- failed:
- if (local_flags & __GFP_WAIT)
- local_irq_disable();
- return 0;
- }
复制代码 执行流程:
1,从cache结构中获得并计算着色区偏移量;
2,从伙伴系统中获得1<<cachep->gfporder个页面用于slab;
3,初始化slab中相关变量,如果是外置式slab需要从新申请slab管理区的空间,由函数alloc_slabmgmt()实现。
view plaincopy to clipboardprint?- 01./*分配slab管理对象*/
- 02.static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
- 03. int colour_off, gfp_t local_flags,
- 04. int nodeid)
- 05.{
- 06. struct slab *slabp;
- 07.
- 08. if (OFF_SLAB(cachep)) {
- 09. /* Slab management obj is off-slab. */
- 10. /* 外置式slab。从general slab cache中分配一个管理对象,
- 11. slabp_cache指向保存有struct slab对象的general slab cache。
- 12. slab初始化阶段general slab cache可能还未创建,slabp_cache指针为空
- 13. ,故初始化阶段创建的slab均为内置式slab。*/
- 14. slabp = kmem_cache_alloc_node(cachep->slabp_cache,
- 15. local_flags, nodeid);
- 16. /*
- 17. * If the first object in the slab is leaked (it's allocated
- 18. * but no one has a reference to it), we want to make sure
- 19. * kmemleak does not treat the ->s_mem pointer as a reference
- 20. * to the object. Otherwise we will not report the leak.
- 21. *//* 对第一个对象做检查 */
- 22. kmemleak_scan_area(slabp, offsetof(struct slab, list),
- 23. sizeof(struct list_head), local_flags);
- 24. if (!slabp)
- 25. return NULL;
- 26. } else {/* 内置式slab。objp为slab首页面的虚拟地址,加上着色偏移
- 27. ,得到slab管理对象的虚拟地址 */
- 28. slabp = objp + colour_off;
- 29. /* 计算slab中第一个对象的页内偏移,slab_size保存slab管理对象的大小
- 30. ,包含struct slab对象和kmem_bufctl_t数组 */
- 31. colour_off += cachep->slab_size;
- 32. } /* 在用(已分配)对象数为0 */
- 33. slabp->inuse = 0;
- 34. /* 第一个对象的页内偏移,可见对于内置式slab,colouroff成员不仅包括着色区
- 35. ,还包括管理对象占用的空间
- 36. ,外置式slab,colouroff成员只包括着色区。*/
- 37. slabp->colouroff = colour_off;
- 38. /* 第一个对象的虚拟地址 */
- 39. slabp->s_mem = objp + colour_off;
- 40. /* 内存节点ID */
- 41. slabp->nodeid = nodeid;
- 42. /* 第一个空闲对象索引为0,即kmem_bufctl_t数组的第一个元素 */
- 43. slabp->free = 0;
- 44. return slabp;
- 45.}
- /*分配slab管理对象*/
- static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
- int colour_off, gfp_t local_flags,
- int nodeid)
- {
- struct slab *slabp;
- if (OFF_SLAB(cachep)) {
- /* Slab management obj is off-slab. */
- /* 外置式slab。从general slab cache中分配一个管理对象,
- slabp_cache指向保存有struct slab对象的general slab cache。
- slab初始化阶段general slab cache可能还未创建,slabp_cache指针为空
- ,故初始化阶段创建的slab均为内置式slab。*/
- slabp = kmem_cache_alloc_node(cachep->slabp_cache,
- local_flags, nodeid);
- /*
- * If the first object in the slab is leaked (it's allocated
- * but no one has a reference to it), we want to make sure
- * kmemleak does not treat the ->s_mem pointer as a reference
- * to the object. Otherwise we will not report the leak.
- *//* 对第一个对象做检查 */
- kmemleak_scan_area(slabp, offsetof(struct slab, list),
- sizeof(struct list_head), local_flags);
- if (!slabp)
- return NULL;
- } else {/* 内置式slab。objp为slab首页面的虚拟地址,加上着色偏移
- ,得到slab管理对象的虚拟地址 */
- slabp = objp + colour_off;
- /* 计算slab中第一个对象的页内偏移,slab_size保存slab管理对象的大小
- ,包含struct slab对象和kmem_bufctl_t数组 */
- colour_off += cachep->slab_size;
- } /* 在用(已分配)对象数为0 */
- slabp->inuse = 0;
- /* 第一个对象的页内偏移,可见对于内置式slab,colouroff成员不仅包括着色区
- ,还包括管理对象占用的空间
- ,外置式slab,colouroff成员只包括着色区。*/
- slabp->colouroff = colour_off;
- /* 第一个对象的虚拟地址 */
- slabp->s_mem = objp + colour_off;
- /* 内存节点ID */
- slabp->nodeid = nodeid;
- /* 第一个空闲对象索引为0,即kmem_bufctl_t数组的第一个元素 */
- slabp->free = 0;
- return slabp;
- }
复制代码 通过初始化,我们画出下面图像。
4,设置slab中页面(1<<cachep->gfporder个)到slab、cache的映射。这样,可以通过page的lru链表找到page所属的slab和cache。slab_map_pages()实现
view plaincopy to clipboardprint?- 01./*设置page到cache、slab的指针,这样就能知道页面所在的cache、slab
- 02. addr:slab首页面虚拟地址*/
- 03.static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
- 04. void *addr)
- 05.{
- 06. int nr_pages;
- 07. struct page *page;
- 08. /* 获得slab首页面*/
- 09. page = virt_to_page(addr);
- 10.
- 11. nr_pages = 1;
- 12. /* 如果不是大页面(关于大页面请参阅相关文档)
- 13. ,计算页面的个数 */
- 14. if (likely(!PageCompound(page)))
- 15. nr_pages <<= cache->gfporder;
- 16.
- 17. do {
- 18. /* struct page结构中的lru根据页面的用途有不同的含义
- 19. ,当页面空闲或用于高速缓存时,
- 20. lru成员用于构造双向链表将page串联起来,而当page用于slab时,
- 21. next指向page所在的cache,prev指向page所在的slab */
- 22. page_set_cache(page, cache);
- 23. page_set_slab(page, slab);
- 24. page++;
- 25. } while (--nr_pages);
- 26.}
- /*设置page到cache、slab的指针,这样就能知道页面所在的cache、slab
- addr:slab首页面虚拟地址*/
- static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
- void *addr)
- {
- int nr_pages;
- struct page *page;
- /* 获得slab首页面*/
- page = virt_to_page(addr);
- nr_pages = 1;
- /* 如果不是大页面(关于大页面请参阅相关文档)
- ,计算页面的个数 */
- if (likely(!PageCompound(page)))
- nr_pages <<= cache->gfporder;
- do {
- /* struct page结构中的lru根据页面的用途有不同的含义
- ,当页面空闲或用于高速缓存时,
- lru成员用于构造双向链表将page串联起来,而当page用于slab时,
- next指向page所在的cache,prev指向page所在的slab */
- page_set_cache(page, cache);
- page_set_slab(page, slab);
- page++;
- } while (--nr_pages);
- }
复制代码 代码实现结果如下图
5,初始化slab中kmem_bufctl_t[]数组,其中kmem_bufctl_t[]数组为一个静态链表,指定了slab对象(obj)的访问顺序。即kmem_bufctl_t[]中存放的是下一个访问的obj。在后面分析中slab_get_obj()函数从slab中提取一个空闲对象,他通过index_to_obj()函数找到空闲对象在kmem_bufctl_t[]数组中的下标,然后通过slab_bufctl(slabp)[slabp->free]获得下一个空闲对象的索引并用它更新静态链表。
view plaincopy to clipboardprint?- 01./*初始化slab中的对象,主要是通过kmem_bufctl_t数组将对象串联起来*/
- 02.static void cache_init_objs(struct kmem_cache *cachep,
- 03. struct slab *slabp)
- 04.{
- 05. int i;
- 06. /* 逐一初始化slab中的对象 */
- 07. for (i = 0; i < cachep->num; i++) {
- 08. /* 获得slab中第i个对象 */
- 09. void *objp = index_to_obj(cachep, slabp, i);
- 10.#if DEBUG
- 11. /* need to poison the objs? */
- 12. if (cachep->flags & SLAB_POISON)
- 13. poison_obj(cachep, objp, POISON_FREE);
- 14. if (cachep->flags & SLAB_STORE_USER)
- 15. *dbg_userword(cachep, objp) = NULL;
- 16.
- 17. if (cachep->flags & SLAB_RED_ZONE) {
- 18. *dbg_redzone1(cachep, objp) = RED_INACTIVE;
- 19. *dbg_redzone2(cachep, objp) = RED_INACTIVE;
- 20. }
- 21. /*
- 22. * Constructors are not allowed to allocate memory from the same
- 23. * cache which they are a constructor for. Otherwise, deadlock.
- 24. * They must also be threaded.
- 25. */
- 26. if (cachep->ctor && !(cachep->flags & SLAB_POISON))
- 27. cachep->ctor(objp + obj_offset(cachep));
- 28.
- 29. if (cachep->flags & SLAB_RED_ZONE) {
- 30. if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
- 31. slab_error(cachep, "constructor overwrote the"
- 32. " end of an object");
- 33. if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
- 34. slab_error(cachep, "constructor overwrote the"
- 35. " start of an object");
- 36. }
- 37. if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
- 38. OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
- 39. kernel_map_pages(virt_to_page(objp),
- 40. cachep->buffer_size / PAGE_SIZE, 0);
- 41.#else
- 42. /* 调用此对象的构造函数 */
- 43. if (cachep->ctor)
- 44. cachep->ctor(objp);
- 45.#endif /* 初始时所有对象都是空闲的,只需按照数组顺序串起来即可 */
- 46. /*相当于静态索引指针*/
- 47. slab_bufctl(slabp)[i] = i + 1;
- 48. }
- 49. /* 最后一个指向BUFCTL_END */
- 50. slab_bufctl(slabp)[i - 1] = BUFCTL_END;
- 51.}
复制代码 |
|