免费注册 查看新帖 |

Chinaunix

  平台 论坛 博客 文库
最近访问板块 发新帖
查看: 3832 | 回复: 3
打印 上一主题 下一主题

有关HOT PAGE COLD PAGE的问题 [复制链接]

论坛徽章:
0
跳转到指定楼层
1 [收藏(0)] [报告]
发表于 2008-11-05 12:06 |只看该作者 |倒序浏览
各位前辈,你们好
    最近碰到一个有关LINUX中的hot page和 cold page的问题,在网络上找了好久没找到相关资料
求哪位高手详解!
      在线等!!急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急急!!

论坛徽章:
0
2 [报告]
发表于 2008-11-05 13:06 |只看该作者
把你的问题描述详细些

论坛徽章:
0
3 [报告]
发表于 2008-11-05 13:08 |只看该作者
From:   Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
To:   BK Commits List:;
Subject:   [PATCH] hot-n-cold pages: page allocator core
Date:   Wed, 30 Oct 2002 23:35:53 +0000


ChangeSet 1.909, 2002/10/30 15:35:53-08:00, akpm@digeo.com

        [PATCH] hot-n-cold pages: page allocator core
       
        Hot/Cold pages and zone->lock amortisation


# This patch includes the following deltas:
#                   ChangeSet        1.908   -> 1.909  
#        include/linux/mmzone.h        1.28    -> 1.29   
#          include/linux/mm.h        1.89    -> 1.90   
#             mm/page_alloc.c        1.119   -> 1.120  
#         include/linux/gfp.h        1.8     -> 1.9   
#                   mm/swap.c        1.37    -> 1.38   
#

include/linux/gfp.h    |    7 +-
include/linux/mm.h     |    1
include/linux/mmzone.h |   17 +++++
mm/page_alloc.c        |  160 ++++++++++++++++++++++++++++++++++++++-----------
mm/swap.c              |    5 -
5 files changed, 151 insertions(+), 39 deletions(-)


diff -Nru a/include/linux/gfp.h b/include/linux/gfp.h
--- a/include/linux/gfp.h        Wed Oct 30 16:21:56 2002
+++ b/include/linux/gfp.h        Wed Oct 30 16:21:56 2002
@@ -17,6 +17,7 @@
#define __GFP_IO        0x40        /* Can start low memory physical IO? */
#define __GFP_HIGHIO        0x80        /* Can start high mem physical IO? */
#define __GFP_FS        0x100        /* Can call down to low-level FS? */
+#define __GFP_COLD        0x200        /* Cache-cold page required */

#define GFP_NOHIGHIO        (             __GFP_WAIT | __GFP_IO)
#define GFP_NOIO        (             __GFP_WAIT)
@@ -32,6 +33,7 @@

#define GFP_DMA                __GFP_DMA

+
/*
  * There is only one page-allocator function, and two main namespaces to
  * it. The alloc_page*() variants return 'struct page *' and as such
@@ -77,11 +79,10 @@
#define __get_dma_pages(gfp_mask, order) \
                __get_free_pages((gfp_mask) | GFP_DMA,(order))

-/*
- * There is only one 'core' page-freeing function.
- */
extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
+extern void FASTCALL(free_hot_page(struct page *page));
+extern void FASTCALL(free_cold_page(struct page *page));

#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr),0)
diff -Nru a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h        Wed Oct 30 16:21:56 2002
+++ b/include/linux/mm.h        Wed Oct 30 16:21:56 2002
@@ -211,7 +211,6 @@
#define set_page_count(p,v)         atomic_set(&(p)->count, v)

extern void FASTCALL(__page_cache_release(struct page *));
-void FASTCALL(__free_pages_ok(struct page *page, unsigned int order));

static inline void put_page(struct page *page)
{
diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h        Wed Oct 30 16:21:56 2002
+++ b/include/linux/mmzone.h        Wed Oct 30 16:21:56 2002
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/cache.h>
+#include <linux/threads.h>
#include <asm/atomic.h>
#ifdef CONFIG_DISCONTIGMEM
#include <asm/numnodes.h>
@@ -46,6 +47,18 @@
#define ZONE_PADDING(name)
#endif

+struct per_cpu_pages {
+        int count;                /* number of pages in the list */
+        int low;                /* low watermark, refill needed */
+        int high;                /* high watermark, emptying needed */
+        int batch;                /* chunk size for buddy add/remove */
+        struct list_head list;        /* the list of pages */
+};
+
+struct per_cpu_pageset {
+        struct per_cpu_pages pcp[2];        /* 0: hot.  1: cold */
+} ____cacheline_aligned_in_smp;
+
/*
  * On machines where it is needed (eg PCs) we divide physical memory
  * into multiple physical zones. On a PC we have 3 zones:
@@ -106,6 +119,10 @@
        wait_queue_head_t        * wait_table;
        unsigned long                wait_table_size;
        unsigned long                wait_table_bits;
+
+        ZONE_PADDING(_pad3_)
+
+        struct per_cpu_pageset        pageset[NR_CPUS];

        /*
         * Discontig memory support fields.
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c        Wed Oct 30 16:21:56 2002
+++ b/mm/page_alloc.c        Wed Oct 30 16:21:56 2002
@@ -10,6 +10,8 @@
  *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
  *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
  *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
+ *  Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
+ *          (lots of bits borrowed from Ingo Molnar & Andrew Morton)
  */

#include <linux/config.h>
@@ -151,13 +153,14 @@
  * Assumes all pages on list are in same zone, and of same order.
  * count is the number of pages to free, or 0 for all on the list.
  */
-static void
+static int
free_pages_bulk(struct zone *zone, int count,
                struct list_head *list, unsigned int order)
{
        unsigned long mask, flags;
        struct free_area *area;
        struct page *base, *page = NULL;
+        int ret = 0;

        mask = (~0UL) << order;
        base = zone->zone_mem_map;
@@ -169,8 +172,10 @@
                list_del(&page->list);
                __free_pages_bulk(page, base, zone, area, mask, order);
                mod_page_state(pgfree, count<<order);
+                ret++;
        }
        spin_unlock_irqrestore(&zone->lock, flags);
+        return ret;
}

void __free_pages_ok(struct page *page, unsigned int order)
@@ -201,14 +206,13 @@
                index += size;
                page += size;
        }
-        BUG_ON(bad_range(zone, page));
        return page;
}

/*
  * This page is about to be returned from the page allocator
  */
-static inline void prep_new_page(struct page *page)
+static void prep_new_page(struct page *page)
{
        if (        page->mapping ||
                page_mapped(page) ||
@@ -248,36 +252,17 @@
                        continue;

                page = list_entry(curr, struct page, list);
-                BUG_ON(bad_range(zone, page));
                list_del(curr);
                index = page - zone->zone_mem_map;
                if (current_order != MAX_ORDER-1)
                        MARK_USED(index, current_order, area);
                zone->free_pages -= 1UL << order;
-                page = expand(zone, page, index, order, current_order, area);
-                return page;
+                return expand(zone, page, index, order, current_order, area);
        }

        return NULL;
}

-/* Obtain a single element from the buddy allocator */
-static struct page *rmqueue(struct zone *zone, unsigned int order)
-{
-        unsigned long flags;
-        struct page *page;
-
-        spin_lock_irqsave(&zone->lock, flags);
-        page = __rmqueue(zone, order);
-        spin_unlock_irqrestore(&zone->lock, flags);
-
-        if (page != NULL) {
-                BUG_ON(bad_range(zone, page));
-                prep_new_page(page);
-        }
-        return page;
-}
-
/*
  * Obtain a specified number of elements from the buddy allocator, all under
  * a single hold of the lock, for efficiency.  Add them to the supplied list.
@@ -341,6 +326,72 @@
#endif /* CONFIG_SOFTWARE_SUSPEND */

/*
+ * Free a 0-order page
+ */
+static void FASTCALL(free_hot_cold_page(struct page *page, int cold));
+static void free_hot_cold_page(struct page *page, int cold)
+{
+        struct zone *zone = page_zone(page);
+        struct per_cpu_pages *pcp;
+        unsigned long flags;
+
+        free_pages_check(__FUNCTION__, page);
+        pcp = &zone->pageset[get_cpu()].pcp[cold];
+        local_irq_save(flags);
+        if (pcp->count >= pcp->high)
+                pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+        list_add(&page->list, &pcp->list);
+        pcp->count++;
+        local_irq_restore(flags);
+        put_cpu();
+}
+
+void free_hot_page(struct page *page)
+{
+        free_hot_cold_page(page, 0);
+}
+       
+void free_cold_page(struct page *page)
+{
+        free_hot_cold_page(page, 1);
+}
+
+static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
+{
+        unsigned long flags;
+        struct page *page = NULL;
+
+        if (order == 0) {
+                struct per_cpu_pages *pcp;
+
+                pcp = &zone->pageset[get_cpu()].pcp[cold];
+                local_irq_save(flags);
+                if (pcp->count <= pcp->low)
+                        pcp->count += rmqueue_bulk(zone, 0,
+                                                pcp->batch, &pcp->list);
+                if (pcp->count) {
+                        page = list_entry(pcp->list.next, struct page, list);
+                        list_del(&page->list);
+                        pcp->count--;
+                }
+                local_irq_restore(flags);
+                put_cpu();
+        }
+
+        if (page == NULL) {
+                spin_lock_irqsave(&zone->lock, flags);
+                page = __rmqueue(zone, order);
+                spin_unlock_irqrestore(&zone->lock, flags);
+        }
+
+        if (page != NULL) {
+                BUG_ON(bad_range(zone, page));
+                prep_new_page(page);
+        }
+        return page;
+}
+
+/*
  * This is the 'heart' of the zoned buddy allocator:
  */
struct page *
@@ -349,13 +400,18 @@
{
        unsigned long min;
        struct zone **zones, *classzone;
-        struct page * page;
+        struct page *page;
        int cflags;
        int i;
+        int cold;

        if (gfp_mask & __GFP_WAIT)
                might_sleep();

+        cold = 0;
+        if (gfp_mask & __GFP_COLD)
+                cold = 1;
+
        mod_page_state(pgalloc, 1<<order);

        zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
@@ -371,7 +427,7 @@
                /* the incremental min is allegedly to discourage fallback */
                min += z->pages_low;
                if (z->free_pages > min || z->free_pages >= z->pages_high) {
-                        page = rmqueue(z, order);
+                        page = buffered_rmqueue(z, order, cold);
                        if (page)
                                return page;
                }
@@ -396,7 +452,7 @@
                        local_min >>= 2;
                min += local_min;
                if (z->free_pages > min || z->free_pages >= z->pages_high) {
-                        page = rmqueue(z, order);
+                        page = buffered_rmqueue(z, order, cold);
                        if (page)
                                return page;
                }
@@ -410,7 +466,7 @@
                for (i = 0; zones != NULL; i++) {
                        struct zone *z = zones;

-                        page = rmqueue(z, order);
+                        page = buffered_rmqueue(z, order, cold);
                        if (page)
                                return page;
                }
@@ -440,7 +496,7 @@

                min += z->pages_min;
                if (z->free_pages > min || z->free_pages >= z->pages_high) {
-                        page = rmqueue(z, order);
+                        page = buffered_rmqueue(z, order, cold);
                        if (page)
                                return page;
                }
@@ -492,13 +548,17 @@
        int i = pagevec_count(pvec);

        while (--i >= 0)
-                __free_pages_ok(pvec->pages, 0);
+                free_hot_page(pvec->pages);
}

void __free_pages(struct page *page, unsigned int order)
{
-        if (!PageReserved(page) && put_page_testzero(page))
-                __free_pages_ok(page, order);
+        if (!PageReserved(page) && put_page_testzero(page)) {
+                if (order == 0)
+                        free_hot_page(page);
+                else
+                        __free_pages_ok(page, order);
+        }
}

void free_pages(unsigned long addr, unsigned int order)
@@ -899,7 +959,7 @@
        unsigned long i, j;
        unsigned long local_offset;
        const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
-        int nid = pgdat->node_id;
+        int cpu, nid = pgdat->node_id;
        struct page *lmem_map = pgdat->node_mem_map;
        unsigned long zone_start_pfn = pgdat->node_start_pfn;

@@ -911,13 +971,13 @@
                struct zone *zone = pgdat->node_zones + j;
                unsigned long mask;
                unsigned long size, realsize;
+                unsigned long batch;

                zone_table[nid * MAX_NR_ZONES + j] = zone;
                realsize = size = zones_size[j];
                if (zholes_size)
                        realsize -= zholes_size[j];

-                printk("  %s zone: %lu pages\n", zone_names[j], realsize);
                zone->spanned_pages = size;
                zone->present_pages = realsize;
                zone->name = zone_names[j];
@@ -925,6 +985,40 @@
                spin_lock_init(&zone->lru_lock);
                zone->zone_pgdat = pgdat;
                zone->free_pages = 0;
+
+                /*
+                 * The per-cpu-pages pools are set to around 1000th of the
+                 * size of the zone.  But no more than 1/4 of a meg - there's
+                 * no point in going beyond the size of L2 cache.
+                 *
+                 * OK, so we don't know how big the cache is.  So guess.
+                 */
+                batch = zone->present_pages / 1024;
+                if (batch * PAGE_SIZE > 256 * 1024)
+                        batch = (256 * 1024) / PAGE_SIZE;
+                batch /= 4;                /* We effectively *= 4 below */
+                if (batch < 1)
+                        batch = 1;
+
+                for (cpu = 0; cpu < NR_CPUS; cpu++) {
+                        struct per_cpu_pages *pcp;
+
+                        pcp = &zone->pageset[cpu].pcp[0];        /* hot */
+                        pcp->count = 0;
+                        pcp->low = 2 * batch;
+                        pcp->high = 6 * batch;
+                        pcp->batch = 1 * batch;
+                        INIT_LIST_HEAD(&pcp->list);
+
+                        pcp = &zone->pageset[cpu].pcp[1];        /* cold */
+                        pcp->count = 0;
+                        pcp->low = 0;
+                        pcp->high = 2 * batch;
+                        pcp->batch = 1 * batch;
+                        INIT_LIST_HEAD(&pcp->list);
+                }
+                printk("  %s zone: %lu pages, LIFO batch:%lu\n",
+                                zone_names[j], realsize, batch);
                INIT_LIST_HEAD(&zone->active_list);
                INIT_LIST_HEAD(&zone->inactive_list);
                atomic_set(&zone->refill_counter, 0);
diff -Nru a/mm/swap.c b/mm/swap.c
--- a/mm/swap.c        Wed Oct 30 16:21:56 2002
+++ b/mm/swap.c        Wed Oct 30 16:21:56 2002
@@ -69,7 +69,8 @@
}

/*
- * This path almost never happens - pages are normally freed via pagevecs.
+ * This path almost never happens for VM activity - pages are normally
+ * freed via pagevecs.  But it gets used by networking.
  */
void __page_cache_release(struct page *page)
{
@@ -83,7 +84,7 @@
                page = NULL;
        spin_unlock_irqrestore(&zone->lru_lock, flags);
        if (page)
-                __free_pages_ok(page, 0);
+                free_hot_page(page);
}

/*


希望对你有帮助!

论坛徽章:
0
4 [报告]
发表于 2013-07-12 13:33 |只看该作者
cold-hot page 是针对现代处理器的cache特性,提交的一个patch。per-CPU有两个page cache链表,一个是hot page 保存可能在处理器cache中的page,一个是cold page 保存不大可能在处理器cache中的page
一般认为cache中的热点page速度要远快于主存中的页,因此,有时要优先从hot page中分配;但是对有些分配,比如用于DMA操作的page,就无需使用hot page
您需要登录后才可以回帖 登录 | 注册

本版积分规则 发表回复

  

北京盛拓优讯信息技术有限公司. 版权所有 京ICP备16024965号-6 北京市公安局海淀分局网监中心备案编号:11010802020122 niuxiaotong@pcpop.com 17352615567
未成年举报专区
中国互联网协会会员  联系我们:huangweiwei@itpub.net
感谢所有关心和支持过ChinaUnix的朋友们 转载本站内容请注明原作者名及出处

清除 Cookies - ChinaUnix - Archiver - WAP - TOP