diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2011-03-23 16:42:30 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 19:46:28 -0700 |
commit | 6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch) | |
tree | 6460e4e1ce206d391b862a3d398a9e22e33ecb3c /mm | |
parent | 5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff) | |
download | linux-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.tar.gz linux-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.tar.bz2 linux-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.zip |
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are
reserved. Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.
This saves a full word for every page in a system with memory cgroups
enabled.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/page_cgroup.c | 91 |
2 files changed, 57 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e286e1603e4f..660dfc27d971 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, if (unlikely(!PageCgroupUsed(pc))) continue; - page = pc->page; + page = lookup_cgroup_page(pc); if (unlikely(!PageLRU(page))) continue; @@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, } spin_unlock_irqrestore(&zone->lru_lock, flags); - page = pc->page; + page = lookup_cgroup_page(pc); ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); if (ret == -ENOMEM) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 59a3cd4c799d..6c3f7a6a481a 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -11,12 +11,11 @@ #include <linux/swapops.h> #include <linux/kmemleak.h> -static void __meminit -__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) +static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) { pc->flags = 0; + set_page_cgroup_array_id(pc, id); pc->mem_cgroup = NULL; - pc->page = pfn_to_page(pfn); INIT_LIST_HEAD(&pc->lru); } static unsigned long total_usage; @@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return base + offset; } +struct page *lookup_cgroup_page(struct page_cgroup *pc) +{ + unsigned long pfn; + struct page *page; + pg_data_t *pgdat; + + pgdat = NODE_DATA(page_cgroup_array_id(pc)); + pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; + page = pfn_to_page(pfn); + VM_BUG_ON(pc != lookup_page_cgroup(page)); + return page; +} + static int __init alloc_node_page_cgroup(int nid) { struct page_cgroup *base, *pc; @@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid) return -ENOMEM; for (index = 0; index < nr_pages; index++) { pc = base + index; - __init_page_cgroup(pc, start_pfn + index); + init_page_cgroup(pc, nid); } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; @@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } +struct page *lookup_cgroup_page(struct page_cgroup *pc) +{ + struct mem_section *section; + struct page *page; + unsigned long nr; + + nr = page_cgroup_array_id(pc); + section = __nr_to_section(nr); + page = pfn_to_page(pc - section->page_cgroup); + VM_BUG_ON(pc != lookup_page_cgroup(page)); + return page; +} + /* __alloc_bootmem...() is protected by !slab_available() */ static int __init_refok init_section_page_cgroup(unsigned long pfn) { - struct mem_section *section = __pfn_to_section(pfn); struct page_cgroup *base, *pc; + struct mem_section *section; unsigned long table_size; + unsigned long nr; int nid, index; - if (!section->page_cgroup) { - nid = page_to_nid(pfn_to_page(pfn)); - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - VM_BUG_ON(!slab_is_available()); - if (node_state(nid, N_HIGH_MEMORY)) { - base = kmalloc_node(table_size, - GFP_KERNEL | __GFP_NOWARN, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); - if (!base) - base = vmalloc(table_size); - } - /* - * The value stored in section->page_cgroup is (base - pfn) - * and it does not point to the memory block allocated above, - * causing kmemleak false positives. - */ - kmemleak_not_leak(base); + nr = pfn_to_section_nr(pfn); + section = __nr_to_section(nr); + + if (section->page_cgroup) + return 0; + + nid = page_to_nid(pfn_to_page(pfn)); + table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; + VM_BUG_ON(!slab_is_available()); + if (node_state(nid, N_HIGH_MEMORY)) { + base = kmalloc_node(table_size, + GFP_KERNEL | __GFP_NOWARN, nid); + if (!base) + base = vmalloc_node(table_size, nid); } else { - /* - * We don't have to allocate page_cgroup again, but - * address of memmap may be changed. So, we have to initialize - * again. - */ - base = section->page_cgroup + pfn; - table_size = 0; - /* check address of memmap is changed or not. */ - if (base->page == pfn_to_page(pfn)) - return 0; + base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); + if (!base) + base = vmalloc(table_size); } + /* + * The value stored in section->page_cgroup is (base - pfn) + * and it does not point to the memory block allocated above, + * causing kmemleak false positives. + */ + kmemleak_not_leak(base); if (!base) { printk(KERN_ERR "page cgroup allocation failure\n"); @@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) for (index = 0; index < PAGES_PER_SECTION; index++) { pc = base + index; - __init_page_cgroup(pc, pfn + index); + init_page_cgroup(pc, nr); } section->page_cgroup = base - pfn; |