平台论坛博客文库

› 论坛 › IT运维 › 监控及自动化运维技术 › linux高端内存管理之非连续内存区（分配和释放）

linux高端内存管理之非连续内存区（分配和释放） [复制链接]

三里屯摇滚

家境小康

论坛徽章:: 0

电梯直达

1楼 [收藏(0)] [报告]

发表于 2012-01-06 16:29 |只看该作者 |倒序浏览

linux高端内存管理之非连续内存区（分配和释放）

前面总结了非连续内存区域的内核描述，接着看看他的分配和释放。

一、非连续内存区的分配

不管是vmalloc()还是vmalloc_32()等系列的分配函数最后都会调用__vmalloc_node()函数实现，直接看这个函数的实现。

view plaincopy to clipboardprint? * __vmalloc_node - allocate virtually contiguous memory
* @size: allocation size
* @align: desired alignment
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
* @node: node to use for allocation or -1
* @caller: caller's return address
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, void *caller)
{
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
return NULL;
/*分配相关的结构并对其初始化，在前面介绍过了*/
area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
VMALLOC_END, node, gfp_mask, caller);
if (!area)
return NULL;
/*分配物理空间，建立页表映射*/
addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
/*
* A ref_count = 3 is needed because the vm_struct and vmap_area
* structures allocated in the __get_vm_area_node() function contain
* references to the virtual address of the vmalloc'ed block.
*/
/*调试用*/
kmemleak_alloc(addr, real_size, 3, gfp_mask);
return addr;
}
* __vmalloc_node - allocate virtually contiguous memory
* @size: allocation size
* @align: desired alignment
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
* @node: node to use for allocation or -1
* @caller: caller's return address
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, void *caller)
{
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
return NULL;
/*分配相关的结构并对其初始化，在前面介绍过了*/
area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
VMALLOC_END, node, gfp_mask, caller);
if (!area)
return NULL;
/*分配物理空间，建立页表映射*/
addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
/*
* A ref_count = 3 is needed because the vm_struct and vmap_area
* structures allocated in the __get_vm_area_node() function contain
* references to the virtual address of the vmalloc'ed block.
*/
/*调试用*/
kmemleak_alloc(addr, real_size, 3, gfp_mask);
return addr;
}view plaincopy to clipboardprint? struct page **pages;
unsigned int nr_pages, array_size, i;
/*需要减去一个页面，因为在分配结构的时候指定了多一个页面*/
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
/*页面指针所占空间大小*/
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {/*如果页面指针空间大于一个页面时，这个空间用非连续内存分配*/
pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
PAGE_KERNEL, node, caller);
area->flags |= VM_VPAGES;
} else {/*如果页面指针空间所占大小小于一个页面时，用slab机制分配这个空间*/
pages = kmalloc_node(array_size,
(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
node);
}
/*初始化area结构*/
area->pages = pages;
area->caller = caller;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
/*对每个页面调用分配函数分配物理空间，
也就是每次分配一个页面*/
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node < 0)/*分配物理页面空间*/
page = alloc_page(gfp_mask);
else
page = alloc_pages_node(node, gfp_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;/*初始化area中page数组*/
}
/*因为非连续区间没有建立页表机制，在这里需要建立他*/
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;/*返回线性地址*/
fail:
vfree(area->addr);
return NULL;
}
struct page **pages;
unsigned int nr_pages, array_size, i;
/*需要减去一个页面，因为在分配结构的时候指定了多一个页面*/
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
/*页面指针所占空间大小*/
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {/*如果页面指针空间大于一个页面时，这个空间用非连续内存分配*/
pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
PAGE_KERNEL, node, caller);
area->flags |= VM_VPAGES;
} else {/*如果页面指针空间所占大小小于一个页面时，用slab机制分配这个空间*/
pages = kmalloc_node(array_size,
(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
node);
}
/*初始化area结构*/
area->pages = pages;
area->caller = caller;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
/*对每个页面调用分配函数分配物理空间，
也就是每次分配一个页面*/
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node < 0)/*分配物理页面空间*/
page = alloc_page(gfp_mask);
else
page = alloc_pages_node(node, gfp_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;/*初始化area中page数组*/
}
/*因为非连续区间没有建立页表机制，在这里需要建立他*/
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;/*返回线性地址*/
fail:
vfree(area->addr);
return NULL;
}

复制代码

其中map_vm_area()建立页表映射机制的实现就是依次对pgd、pud、pmd、pte的设置。

二、非连续内存区的释放

调用vfree()函数实现

view plaincopy to clipboardprint?/**
* vfree - release memory allocated by vmalloc()
* @addr: memory base address
*
* Free the virtually continuous memory area starting at @addr, as
* obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
* NULL, no operation is performed.
*
* Must not be called in interrupt context.
*/
void vfree(const void *addr)
{
BUG_ON(in_interrupt());
/*调试用*/
kmemleak_free(addr);
/*释放工作*/
__vunmap(addr, 1);
}
/**
* vfree - release memory allocated by vmalloc()
* @addr: memory base address
*
* Free the virtually continuous memory area starting at @addr, as
* obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
* NULL, no operation is performed.
*
* Must not be called in interrupt context.
*/
void vfree(const void *addr)
{
BUG_ON(in_interrupt());
/*调试用*/
kmemleak_free(addr);
/*释放工作*/
__vunmap(addr, 1);
}view plaincopy to clipboardprint?static void __vunmap(const void *addr, int deallocate_pages)
{
struct vm_struct *area;
if (!addr)
return;
if ((PAGE_SIZE-1) & (unsigned long)addr) {
WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
return;
}
/*从vlist链表和红黑树中移除指定地址的线性区间*/
area = remove_vm_area(addr);
if (unlikely(!area)) {
WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
return;
}
debug_check_no_locks_freed(addr, area->size);
debug_check_no_obj_freed(addr, area->size);
if (deallocate_pages) {
int i;
for (i = 0; i < area->nr_pages; i++) {/*每次释放一个页面*/
struct page *page = area->pages[i];
BUG_ON(!page);
__free_page(page);
}
if (area->flags & VM_VPAGES)/*在创建非连续区间时，如果页面
指针所占的空间大于一个页面时，从非连续内存区间
中分配。所以这里也就从相应的释放*/
vfree(area->pages);
else
kfree(area->pages);/*从slab中释放*/
}
kfree(area);/*释放area*/
return;
}
static void __vunmap(const void *addr, int deallocate_pages)
{
struct vm_struct *area;
if (!addr)
return;
if ((PAGE_SIZE-1) & (unsigned long)addr) {
WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
return;
}
/*从vlist链表和红黑树中移除指定地址的线性区间*/
area = remove_vm_area(addr);
if (unlikely(!area)) {
WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
return;
}
debug_check_no_locks_freed(addr, area->size);
debug_check_no_obj_freed(addr, area->size);
if (deallocate_pages) {
int i;
for (i = 0; i < area->nr_pages; i++) {/*每次释放一个页面*/
struct page *page = area->pages[i];
BUG_ON(!page);
__free_page(page);
}
if (area->flags & VM_VPAGES)/*在创建非连续区间时，如果页面
指针所占的空间大于一个页面时，从非连续内存区间
中分配。所以这里也就从相应的释放*/
vfree(area->pages);
else
kfree(area->pages);/*从slab中释放*/
}
kfree(area);/*释放area*/
return;
}view plaincopy to clipboardprint?/**
* remove_vm_area - find and remove a continuous kernel virtual area
* @addr: base address
*
* Search for the kernel VM area starting at @addr, and remove it.
* This function returns the found VM area, but using it is NOT safe
* on SMP machines, except for its size or flags.
*/
struct vm_struct *remove_vm_area(const void *addr)
{
struct vmap_area *va;
/*从红黑树种查找而不是链表，为了效率起见*/
va = find_vmap_area((unsigned long)addr);
if (va && va->flags & VM_VM_AREA) {
struct vm_struct *vm = va->private;
struct vm_struct *tmp, **p;
/*
* remove from list and disallow access to this vm_struct
* before unmap. (address range confliction is maintained by
* vmap.)
*/
write_lock(&vmlist_lock);
/*从链表中找到，然后删除*/
for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
;
*p = tmp->next;
write_unlock(&vmlist_lock);
/*调试用*/
vmap_debug_free_range(va->va_start, va->va_end);
/*从红黑树中删除*/
free_unmap_vmap_area(va);
vm->size -= PAGE_SIZE;
return vm;
}
return NULL;
}
/**
* remove_vm_area - find and remove a continuous kernel virtual area
* @addr: base address
*
* Search for the kernel VM area starting at @addr, and remove it.
* This function returns the found VM area, but using it is NOT safe
* on SMP machines, except for its size or flags.
*/
struct vm_struct *remove_vm_area(const void *addr)
{
struct vmap_area *va;
/*从红黑树种查找而不是链表，为了效率起见*/
va = find_vmap_area((unsigned long)addr);
if (va && va->flags & VM_VM_AREA) {
struct vm_struct *vm = va->private;
struct vm_struct *tmp, **p;
/*
* remove from list and disallow access to this vm_struct
* before unmap. (address range confliction is maintained by
* vmap.)
*/
write_lock(&vmlist_lock);
/*从链表中找到，然后删除*/
for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
;
*p = tmp->next;
write_unlock(&vmlist_lock);
/*调试用*/
vmap_debug_free_range(va->va_start, va->va_end);
/*从红黑树中删除*/
free_unmap_vmap_area(va);
vm->size -= PAGE_SIZE;
return vm;
}
return NULL;
}