平台论坛博客文库

› 论坛 › IT运维 › 监控及自动化运维技术 › Linux内存管理之slab机制（初始化）2.。。。。。。。

Linux内存管理之slab机制（初始化）2.。。。。。。。 [复制链接]

三里屯摇滚

家境小康

论坛徽章:: 0

电梯直达

1楼 [收藏(0)] [报告]

发表于 2012-01-10 10:32 |只看该作者 |倒序浏览

Linux内存管理之slab机制（初始化）2.。。。。。。。

#ifdef CONFIG_ZONE_DMA
sizes->cs_dmacachep = kmem_cache_create(
names->name_dma,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
SLAB_PANIC,
NULL);
#endif
sizes++;
names++;
}
/* 至此，kmalloc general cache已经创建完毕，可以拿来使用了 */
/* 4) Replace the bootstrap head arrays */
/* 第四步，用kmalloc对象替换静态分配的全局变量
。到目前为止一共使用了两个全局local cache
，一个是cache_cache的local cache指向initarray_cache.cache
，另一个是malloc_sizes[INDEX_AC].cs_cachep的local cache指向initarray_generic.cache
，参见setup_cpu_cache函数。这里替换它们。*/
{
struct array_cache *ptr;
/* 申请cache_cache所用local cache的空间 */
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
/* 复制原cache_cache的local cache，即initarray_cache，到新的位置 */
memcpy(ptr, cpu_cache_get(&cache_cache),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
/* cache_cache的local cache指向新的位置 */
cache_cache.array[smp_processor_id()] = ptr;
/* 申请malloc_sizes[INDEX_AC].cs_cachep所用local cache的空间 */
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
!= &initarray_generic.cache);
/* 复制原local cache到新分配的位置，注意此时local cache的大小是固定的 */
memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
ptr;
}
/* 5) Replace the bootstrap kmem_list3's */
/* 第五步，与第四步类似，用kmalloc的空间替换静态分配的slab三链 */
{
int nid;
/* UMA只有一个节点 */
for_each_online_node(nid) {
/* 复制struct kmem_cache的slab三链 */
init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
/* 复制struct arraycache_init的slab三链 */
init_list(malloc_sizes[INDEX_AC].cs_cachep,
&initkmem_list3[SIZE_AC + nid], nid);
/* 复制struct kmem_list3的slab三链 */
if (INDEX_AC != INDEX_L3) {
init_list(malloc_sizes[INDEX_L3].cs_cachep,
&initkmem_list3[SIZE_L3 + nid], nid);
}
}
}
/* 更新slab系统初始化进度 */
g_cpucache_up = EARLY;
}

复制代码

]辅助操作

1，slab三链初始化

view plaincopy to clipboardprint?static void kmem_list3_init(struct kmem_list3 *parent)
{
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
INIT_LIST_HEAD(&parent->slabs_free);
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}
static void kmem_list3_init(struct kmem_list3 *parent)
{
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
INIT_LIST_HEAD(&parent->slabs_free);
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}

复制代码

2，slab三链静态数据初始化

view plaincopy to clipboardprint? /*设置cache的slab三链指向静态分配的全局变量*/
static void __init set_up_list3s(struct kmem_cache *cachep, int index)
{
int node;
/* UMA只有一个节点 */
for_each_online_node(node) {
/* 全局变量initkmem_list3是初始化阶段使用的slab三链 */
cachep->nodelists[node] = &initkmem_list3[index + node];
/* 设置回收时间 */
cachep->nodelists[node]->next_reap = jiffies +
REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
}
}
/*设置cache的slab三链指向静态分配的全局变量*/
static void __init set_up_list3s(struct kmem_cache *cachep, int index)
{
int node;
/* UMA只有一个节点 */
for_each_online_node(node) {
/* 全局变量initkmem_list3是初始化阶段使用的slab三链 */
cachep->nodelists[node] = &initkmem_list3[index + node];
/* 设置回收时间 */
cachep->nodelists[node]->next_reap = jiffies +
REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
}
}

复制代码

3，计算每个slab中对象的数目

view plaincopy to clipboardprint?/*

* Calculate the number of objects and left-over bytes for a given buffer size.
*/
/*计算每个slab中对象的数目。*/
/*
1) gfporder：slab由2gfporder个页面组成。
2) buffer_size：对象的大小。
3) align：对象的对齐方式。
4) flags：内置式slab还是外置式slab。
5) left_over：slab中浪费空间的大小。
6) num：slab中的对象数目。
*/
static void cache_estimate(unsigned long gfporder, size_t buffer_size,
size_t align, int flags, size_t *left_over,
unsigned int *num)
{
int nr_objs;
size_t mgmt_size;
/* slab大小为1<<order个页面 */
size_t slab_size = PAGE_SIZE << gfporder;
/*
* The slab management structure can be either off the slab or
* on it. For the latter case, the memory allocated for a
* slab is used for:
*
* - The struct slab
* - One kmem_bufctl_t for each object
* - Padding to respect alignment of @align
* - @buffer_size bytes for each object
*
* If the slab management structure is off the slab, then the
* alignment will already be calculated into the size. Because
* the slabs are all pages aligned, the objects will be at the
* correct alignment when allocated.
*/
if (flags & CFLGS_OFF_SLAB) {
/* 外置式slab */
mgmt_size = 0;
/* slab页面不含slab管理对象，全部用来存储slab对象 */
nr_objs = slab_size / buffer_size;
/* 对象数不能超过上限 */
if (nr_objs > SLAB_LIMIT)
nr_objs = SLAB_LIMIT;
} else {
/*
* Ignore padding for the initial guess. The padding
* is at most @align-1 bytes, and @buffer_size is at
* least @align. In the worst case, this result will
* be one greater than the number of objects that fit
* into the memory allocation when taking the padding
* into account.
*//* 内置式slab，slab管理对象与slab对象在一起
，此时slab页面中包含：一个struct slab对象，一个kmem_bufctl_t数组，slab对象。
kmem_bufctl_t数组大小与slab对象数目相同 */
nr_objs = (slab_size - sizeof(struct slab)) /
(buffer_size + sizeof(kmem_bufctl_t));
/*
* This calculated number will be either the right
* amount, or one greater than what we want.
*//* 计算cache line对齐后的大小，如果超出了slab总的大小，则对象数减一 */
if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
> slab_size)
nr_objs--;
if (nr_objs > SLAB_LIMIT)
nr_objs = SLAB_LIMIT;
/* 计算cache line对齐后slab管理对象的大小 */
mgmt_size = slab_mgmt_size(nr_objs, align);
}
*num = nr_objs;/* 保存slab对象数目 */
/* 计算浪费空间的大小 */
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
}
/*
* Calculate the number of objects and left-over bytes for a given buffer size.
*/
/*计算每个slab中对象的数目。*/
/*
1) gfporder：slab由2gfporder个页面组成。
2) buffer_size：对象的大小。
3) align：对象的对齐方式。
4) flags：内置式slab还是外置式slab。
5) left_over：slab中浪费空间的大小。
6) num：slab中的对象数目。
*/
static void cache_estimate(unsigned long gfporder, size_t buffer_size,
size_t align, int flags, size_t *left_over,
unsigned int *num)
{
int nr_objs;
size_t mgmt_size;
/* slab大小为1<<order个页面 */
size_t slab_size = PAGE_SIZE << gfporder;
/*
* The slab management structure can be either off the slab or
* on it. For the latter case, the memory allocated for a
* slab is used for:
*
* - The struct slab
* - One kmem_bufctl_t for each object
* - Padding to respect alignment of @align
* - @buffer_size bytes for each object
*
* If the slab management structure is off the slab, then the
* alignment will already be calculated into the size. Because
* the slabs are all pages aligned, the objects will be at the
* correct alignment when allocated.
*/
if (flags & CFLGS_OFF_SLAB) {
/* 外置式slab */
mgmt_size = 0;
/* slab页面不含slab管理对象，全部用来存储slab对象 */
nr_objs = slab_size / buffer_size;
/* 对象数不能超过上限 */
if (nr_objs > SLAB_LIMIT)
nr_objs = SLAB_LIMIT;
} else {
/*
* Ignore padding for the initial guess. The padding
* is at most @align-1 bytes, and @buffer_size is at
* least @align. In the worst case, this result will
* be one greater than the number of objects that fit
* into the memory allocation when taking the padding
* into account.
*//* 内置式slab，slab管理对象与slab对象在一起
，此时slab页面中包含：一个struct slab对象，一个kmem_bufctl_t数组，slab对象。
kmem_bufctl_t数组大小与slab对象数目相同 */
nr_objs = (slab_size - sizeof(struct slab)) /
(buffer_size + sizeof(kmem_bufctl_t));
/*
* This calculated number will be either the right
* amount, or one greater than what we want.
*//* 计算cache line对齐后的大小，如果超出了slab总的大小，则对象数减一 */
if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
> slab_size)
nr_objs--;
if (nr_objs > SLAB_LIMIT)
nr_objs = SLAB_LIMIT;
/* 计算cache line对齐后slab管理对象的大小 */
mgmt_size = slab_mgmt_size(nr_objs, align);
}
*num = nr_objs;/* 保存slab对象数目 */
/* 计算浪费空间的大小 */
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
}

复制代码

辅助数据结构与变量

Linux内核中将所有的通用cache以不同的大小存放在数组中，以方便查找。其中malloc_sizes[]数组为cache_sizes类型的数组，存放各个cache的大小；cache_names[]数组为cache_names结构类型数组，存放各个cache大小的名称；malloc_sizes[]数组和cache_names[]数组下标对应，也就是说cache_names名称的cache对应的大小为malloc_sizes。
view plaincopy to clipboardprint?/* Size description struct for general caches. */

struct cache_sizes {

size_t       cs_size;

struct kmem_cache *cs_cachep;

#ifdef CONFIG_ZONE_DMA

struct kmem_cache *cs_dmacachep;

#endif

};

/*

* These are the default caches for kmalloc. Custom caches can have other sizes.

*/

struct cache_sizes malloc_sizes[] = {

#define CACHE(x) { .cs_size = (x) },

#include <linux/kmalloc_sizes.h>

CACHE(ULONG_MAX)

#undef CACHE

};

/* Size description struct for general caches. */

struct cache_sizes {

size_t      cs_size;

struct kmem_cache *cs_cachep;

#ifdef CONFIG_ZONE_DMA

struct kmem_cache *cs_dmacachep;

#endif

};

/*

* These are the default caches for kmalloc. Custom caches can have other sizes.

*/

struct cache_sizes malloc_sizes[] = {

#define CACHE(x) { .cs_size = (x) },

#include <linux/kmalloc_sizes.h>

CACHE(ULONG_MAX)

#undef CACHE

};view plaincopy to clipboardprint?/* Must match cache_sizes above. Out of line to keep cache footprint low. */

struct cache_names {

char *name;

char *name_dma;

};



static struct cache_names __initdata cache_names[] = {

#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },

#include <linux/kmalloc_sizes.h>

{NULL,}

#undef CACHE

};

/* Must match cache_sizes above. Out of line to keep cache footprint low. */

struct cache_names {

char *name;

char *name_dma;

};

static struct cache_names __initdata cache_names[] = {

#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },

#include <linux/kmalloc_sizes.h>

{NULL,}

#undef CACHE

};
复制代码
二、内核启动末期初始化

1，根据对象大小计算local cache中对象数目上限；

2，借助数据结构ccupdate_struct操作cpu本地cache。为每个在线cpu分配cpu本地cache；

3，用新分配的cpu本地cache替换原有的cache；

4，更新slab三链以及cpu本地共享cache。

文库|博客

在我心中舞动

稍有积蓄

论坛徽章:: 0

2楼 [报告]

发表于 2012-01-10 10:32 |只看该作者

谢谢分享

实战分享：从技术角度谈机器学习入门| 【大话IT】RadonDB低门槛向MySQL集群下战书 | ChinaUnix打赏功能已上线！ | 新一代分布式关系型数据库RadonDB知多少？

返回列表

Chinaunix › 论坛 › IT运维 › 监控及自动化运维技术 › Linux内存管理之slab机制（初始化）2.。。。。。。。

Linux内存管理之slab机制（初始化）2.。。。。。。。 [复制链接]

浏览过的版块