免费注册 查看新帖 |

Chinaunix

  平台 论坛 博客 文库
12
最近访问板块 发新帖
楼主: registcn
打印 上一主题 下一主题

[内存管理] 关于pagecache的来两个问题 [复制链接]

论坛徽章:
15
射手座
日期:2014-02-26 13:45:082015年迎新春徽章
日期:2015-03-04 09:54:452015年辞旧岁徽章
日期:2015-03-03 16:54:15羊年新春福章
日期:2015-02-26 08:47:552015年亚洲杯之卡塔尔
日期:2015-02-03 08:33:45射手座
日期:2014-12-31 08:36:51水瓶座
日期:2014-06-04 08:33:52天蝎座
日期:2014-05-14 14:30:41天秤座
日期:2014-04-21 08:37:08处女座
日期:2014-04-18 16:57:05戌狗
日期:2014-04-04 12:21:33技术图书徽章
日期:2014-03-25 09:00:29
11 [报告]
发表于 2014-05-14 14:34 |只看该作者
瀚海书香 发表于 2014-05-14 10:34
回复 8# humjb_1983
当时貌似是在LWN上看的,刚才找了找没找到。因为这个patch太老了,我的邮件列表里面也 ...

大概看了下这个补丁,只是页缓存功能的补丁。后面还差一个pagecache_limit_ignore_dirty,针对脏页缓存是否限制的补丁,不知瀚海兄还有?或者帮忙指条路,从哪儿找?我看mainline中应该是没有的~

论坛徽章:
6
金牛座
日期:2013-10-08 10:19:10技术图书徽章
日期:2013-10-14 16:24:09CU十二周年纪念徽章
日期:2013-10-24 15:41:34狮子座
日期:2013-11-24 19:26:19未羊
日期:2014-01-23 15:50:002015年亚洲杯之阿联酋
日期:2015-05-09 14:36:15
12 [报告]
发表于 2014-05-14 14:52 |只看该作者
本帖最后由 瀚海书香 于 2014-05-14 14:52 编辑

回复 11# humjb_1983

直接看SUSE 内核源代码的patch就可以的
pagecache_limit_ignore_dirty的patch:
  1. From: Kurt Garloff <garloff@suse.de>
  2. Subject: Make pagecache limit behavior w.r.t. dirty pages configurable
  3. References: FATE309111
  4. Patch-mainline: Never

  5. The last fixes to this patchset ensured that we don't end up calling
  6. shrink_page_cache() [from add_to_page_cache()] again and again without
  7. the ability to actually free something. For this reason we subtracted
  8. the dirty pages from the list of freeable unmapped pages in the
  9. calculation.

  10. With this additional patch, a new sysctl
  11. /proc/sys/vm/pagecache_limit_ignore_dirty
  12. is introduced. With the default setting (1), behavior does not change.
  13. When setting it to 0, we actually consider all of the dirty pages
  14. freeable -- we then allow for a third pass in shrink_page_cache, where
  15. we allow writing out pages (if the gfp_mask allows it).
  16. The value can be set to values above 1 as well; with the value set to 2,
  17. we consider half of the dirty pages freeable etc.

  18. Signed-off-by: Kurt Garloff <garloff@suse.de>

  19. Index: linux-3.0-SLE11-SP2-3.0/include/linux/swap.h
  20. ===================================================================
  21. --- linux-3.0-SLE11-SP2-3.0.orig/include/linux/swap.h
  22. +++ linux-3.0-SLE11-SP2-3.0/include/linux/swap.h
  23. @@ -266,6 +266,7 @@ extern int vm_swappiness;
  24. extern unsigned long pagecache_over_limit(void);
  25. extern void shrink_page_cache(gfp_t mask, struct page *page);
  26. extern unsigned int vm_pagecache_limit_mb;
  27. +extern unsigned int vm_pagecache_ignore_dirty;
  28. extern int remove_mapping(struct address_space *mapping, struct page *page);
  29. extern long vm_total_pages;

  30. Index: linux-3.0-SLE11-SP2-3.0/kernel/sysctl.c
  31. ===================================================================
  32. --- linux-3.0-SLE11-SP2-3.0.orig/kernel/sysctl.c
  33. +++ linux-3.0-SLE11-SP2-3.0/kernel/sysctl.c
  34. @@ -1133,6 +1133,13 @@ static struct ctl_table vm_table[] = {
  35.                 .mode                = 0644,
  36.                 .proc_handler        = &proc_dointvec,
  37.         },
  38. +        {
  39. +                .procname        = "pagecache_limit_ignore_dirty",
  40. +                .data                = &vm_pagecache_ignore_dirty,
  41. +                .maxlen                = sizeof(vm_pagecache_ignore_dirty),
  42. +                .mode                = 0644,
  43. +                .proc_handler        = &proc_dointvec,
  44. +        },
  45. #ifdef CONFIG_HUGETLB_PAGE
  46.         {
  47.                 .procname        = "nr_hugepages",
  48. Index: linux-3.0-SLE11-SP2-3.0/mm/vmscan.c
  49. ===================================================================
  50. --- linux-3.0-SLE11-SP2-3.0.orig/mm/vmscan.c
  51. +++ linux-3.0-SLE11-SP2-3.0/mm/vmscan.c
  52. @@ -150,6 +150,7 @@ struct scan_control {
  53.   */
  54. int vm_swappiness __read_mostly = 60;
  55. unsigned int vm_pagecache_limit_mb __read_mostly = 0;
  56. +unsigned int vm_pagecache_ignore_dirty __read_mostly = 1;
  57. long vm_total_pages __read_mostly;        /* The total number of pages which the VM controls */

  58. static LIST_HEAD(shrinker_list);
  59. @@ -3012,9 +3013,9 @@ static void __shrink_page_cache(gfp_t ma
  60.          * Shrink the LRU in 2 passes:
  61.          * 0 = Reclaim from inactive_list only (fast)
  62.          * 1 = Reclaim from active list but don't reclaim mapped (not that fast)
  63. -         * 2 = Reclaim from active list but don't reclaim mapped (2nd pass)
  64. +         * 2 = Same as 1, but may_writepage = 1 (only done if we can and need it)
  65.          */
  66. -        for (pass = 0; pass < 2; pass++) {
  67. +        for (pass = 0; pass < 3; pass++) {
  68.                 int prio;

  69.                 for (prio = DEF_PRIORITY; prio >= 0; prio--) {
  70. @@ -3036,6 +3037,13 @@ static void __shrink_page_cache(gfp_t ma
  71.                                 goto out;

  72.                 }
  73. +                if (pass == 1) {
  74. +                        if (vm_pagecache_ignore_dirty == 1 ||
  75. +                            (mask & (__GFP_IO | __GFP_FS)) != (__GFP_IO | __GFP_FS) )
  76. +                                break;
  77. +                        else
  78. +                                sc.may_writepage = 1;
  79. +                }
  80.         }

  81. out:
  82. Index: linux-3.0-SLE11-SP2-3.0/mm/page_alloc.c
  83. ===================================================================
  84. --- linux-3.0-SLE11-SP2-3.0.orig/mm/page_alloc.c
  85. +++ linux-3.0-SLE11-SP2-3.0/mm/page_alloc.c
  86. @@ -5623,13 +5623,15 @@ unsigned long pagecache_over_limit()
  87.          * minus the dirty ones. (FIXME: pages accounted for in NR_WRITEBACK
  88.          * are not on the LRU lists  any more, right?) */
  89.         unsigned long pgcache_lru_pages = global_page_state(NR_ACTIVE_FILE)
  90. -                                        + global_page_state(NR_INACTIVE_FILE)
  91. -                                        - global_page_state(NR_FILE_DIRTY);
  92. +                                        + global_page_state(NR_INACTIVE_FILE);
  93.         unsigned long free_pages = global_page_state(NR_FREE_PAGES);
  94.         /* In theory, we'd need to take the swap lock here ... */
  95.         unsigned long swap_pages = total_swap_pages - nr_swap_pages;
  96.         unsigned long limit;

  97. +        if (vm_pagecache_ignore_dirty != 0)
  98. +                pgcache_lru_pages -= global_page_state(NR_FILE_DIRTY)
  99. +                                     /vm_pagecache_ignore_dirty;
  100.         /* Paranoia */
  101.         if (unlikely(pgcache_lru_pages > LONG_MAX))
  102.                 return 0;
  103. Index: linux-3.0-SLE11-SP2-3.0/Documentation/vm/pagecache-limit
  104. ===================================================================
  105. --- linux-3.0-SLE11-SP2-3.0.orig/Documentation/vm/pagecache-limit
  106. +++ linux-3.0-SLE11-SP2-3.0/Documentation/vm/pagecache-limit
  107. @@ -1,6 +1,6 @@
  108. Functionality:
  109. -------------
  110. -The patch introduces a new tunable in the proc filesystem:
  111. +The patch introduces two new tunables in the proc filesystem:

  112. /proc/sys/vm/pagecache_limit_mb

  113. @@ -15,6 +15,13 @@ As we only consider pagecache pages that
  114. NOTE: The real limit depends on the amount of free memory. Every existing free page allows the page cache to grow 8x the amount of free memory above the set baseline. As soon as the free memory is needed, we free up page cache.


  115. +/proc/sys/vm/pagecache_limit_ignore_dirty
  116. +
  117. +The default for this setting is 1; this means that we don't consider dirty memory to be part of the limited pagecache, as we can not easily free up dirty memory (we'd need to do writes for this). By setting this to 0, we actually consider dirty (unampped) memory to be freeable and do a third pass in shrink_page_cache() where we schedule the pages for writeout. Values larger than 1 are also possible and result in a fraction of the dirty pages to be considered non-freeable.
  118. +
  119. +
  120. +
  121. +
  122. How it works:
  123. ------------
  124. The heart of this patch is a new function called shrink_page_cache(). It is called from balance_pgdat (which is the worker for kswapd) if the pagecache is above the limit.
  125. @@ -27,7 +34,9 @@ shrink_page_cache does several passes:
  126.    This is fast -- but it might not find enough free pages; if that happens,
  127.    the second pass will happen
  128. - In the second pass, pages from active list will also be considered.
  129. -- The third pass is just another round of the second pass
  130. +- The third pass will only happen if pagecacahe_limig_ignore-dirty is not 1.
  131. +  In that case, the third pass is a repetition of the second pass, but this
  132. +  time we allow pages to be written out.

  133. In all passes, only unmapped pages will be considered.

  134. Index: linux-3.0-SLE11-SP2-3.0/mm/filemap.c
  135. ===================================================================
  136. --- linux-3.0-SLE11-SP2-3.0.orig/mm/filemap.c
  137. +++ linux-3.0-SLE11-SP2-3.0/mm/filemap.c
  138. @@ -509,6 +509,11 @@ int add_to_page_cache(struct page *page,

  139.         if (unlikely(vm_pagecache_limit_mb) && pagecache_over_limit() > 0)
  140.                 shrink_page_cache(gfp_mask, page);
  141. +        /* FIXME: If we add dirty pages to pagecache here, and we call
  142. +         * shrink_page_cache(), it might need to write out some pages to
  143. +         * keep us below the set pagecache limit -- in order for that to
  144. +         * be successful, we might need to throttle here and do some
  145. +         * congestion_wait(BLK_RW_ASYNC, HZ/10) here. */

  146.         __set_page_locked(page);
  147.         error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
复制代码

论坛徽章:
15
射手座
日期:2014-02-26 13:45:082015年迎新春徽章
日期:2015-03-04 09:54:452015年辞旧岁徽章
日期:2015-03-03 16:54:15羊年新春福章
日期:2015-02-26 08:47:552015年亚洲杯之卡塔尔
日期:2015-02-03 08:33:45射手座
日期:2014-12-31 08:36:51水瓶座
日期:2014-06-04 08:33:52天蝎座
日期:2014-05-14 14:30:41天秤座
日期:2014-04-21 08:37:08处女座
日期:2014-04-18 16:57:05戌狗
日期:2014-04-04 12:21:33技术图书徽章
日期:2014-03-25 09:00:29
13 [报告]
发表于 2014-05-14 15:25 |只看该作者
瀚海书香 发表于 2014-05-14 14:52
回复 11# humjb_1983

直接看SUSE 内核源代码的patch就可以的

太感谢了~,suse的内核代码中还有单独的patch?
redhat太不地道了,从rhel6之后,就没有单独的patch了。

谢谢!

论坛徽章:
6
金牛座
日期:2013-10-08 10:19:10技术图书徽章
日期:2013-10-14 16:24:09CU十二周年纪念徽章
日期:2013-10-24 15:41:34狮子座
日期:2013-11-24 19:26:19未羊
日期:2014-01-23 15:50:002015年亚洲杯之阿联酋
日期:2015-05-09 14:36:15
14 [报告]
发表于 2014-05-14 15:34 |只看该作者
回复 13# humjb_1983
太感谢了~,suse的内核代码中还有单独的patch?
redhat太不地道了,从rhel6之后,就没有单独的patch了。


SLES 11 SP2 3.0.80内核总共有1472个patch

   

论坛徽章:
15
射手座
日期:2014-02-26 13:45:082015年迎新春徽章
日期:2015-03-04 09:54:452015年辞旧岁徽章
日期:2015-03-03 16:54:15羊年新春福章
日期:2015-02-26 08:47:552015年亚洲杯之卡塔尔
日期:2015-02-03 08:33:45射手座
日期:2014-12-31 08:36:51水瓶座
日期:2014-06-04 08:33:52天蝎座
日期:2014-05-14 14:30:41天秤座
日期:2014-04-21 08:37:08处女座
日期:2014-04-18 16:57:05戌狗
日期:2014-04-04 12:21:33技术图书徽章
日期:2014-03-25 09:00:29
15 [报告]
发表于 2014-05-15 18:42 |只看该作者
瀚海书香 发表于 2014-05-14 15:34
回复 13# humjb_1983

感谢感谢~~,看来suse还比较厚道,有补丁要好多了~
您需要登录后才可以回帖 登录 | 注册

本版积分规则 发表回复

  

北京盛拓优讯信息技术有限公司. 版权所有 京ICP备16024965号-6 北京市公安局海淀分局网监中心备案编号:11010802020122 niuxiaotong@pcpop.com 17352615567
未成年举报专区
中国互联网协会会员  联系我们:huangweiwei@itpub.net
感谢所有关心和支持过ChinaUnix的朋友们 转载本站内容请注明原作者名及出处

清除 Cookies - ChinaUnix - Archiver - WAP - TOP