summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2011-03-23 16:42:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-23 19:46:28 -0700
commit6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch)
tree6460e4e1ce206d391b862a3d398a9e22e33ecb3c /mm
parent5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff)
downloadlinux-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.tar.gz
linux-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.tar.bz2
linux-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.zip
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are reserved. Use the remaining upper bits to encode, depending on configuration, the node or the section, to enable page_cgroup-to-page lookups without a direct pointer. This saves a full word for every page in a system with memory cgroups enabled. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/page_cgroup.c91
2 files changed, 57 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e286e1603e4f..660dfc27d971 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
if (unlikely(!PageCgroupUsed(pc)))
continue;
- page = pc->page;
+ page = lookup_cgroup_page(pc);
if (unlikely(!PageLRU(page)))
continue;
@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
}
spin_unlock_irqrestore(&zone->lru_lock, flags);
- page = pc->page;
+ page = lookup_cgroup_page(pc);
ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
if (ret == -ENOMEM)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 59a3cd4c799d..6c3f7a6a481a 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -11,12 +11,11 @@
#include <linux/swapops.h>
#include <linux/kmemleak.h>
-static void __meminit
-__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
+static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
{
pc->flags = 0;
+ set_page_cgroup_array_id(pc, id);
pc->mem_cgroup = NULL;
- pc->page = pfn_to_page(pfn);
INIT_LIST_HEAD(&pc->lru);
}
static unsigned long total_usage;
@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
return base + offset;
}
+struct page *lookup_cgroup_page(struct page_cgroup *pc)
+{
+ unsigned long pfn;
+ struct page *page;
+ pg_data_t *pgdat;
+
+ pgdat = NODE_DATA(page_cgroup_array_id(pc));
+ pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
+ page = pfn_to_page(pfn);
+ VM_BUG_ON(pc != lookup_page_cgroup(page));
+ return page;
+}
+
static int __init alloc_node_page_cgroup(int nid)
{
struct page_cgroup *base, *pc;
@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid)
return -ENOMEM;
for (index = 0; index < nr_pages; index++) {
pc = base + index;
- __init_page_cgroup(pc, start_pfn + index);
+ init_page_cgroup(pc, nid);
}
NODE_DATA(nid)->node_page_cgroup = base;
total_usage += table_size;
@@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
return section->page_cgroup + pfn;
}
+struct page *lookup_cgroup_page(struct page_cgroup *pc)
+{
+ struct mem_section *section;
+ struct page *page;
+ unsigned long nr;
+
+ nr = page_cgroup_array_id(pc);
+ section = __nr_to_section(nr);
+ page = pfn_to_page(pc - section->page_cgroup);
+ VM_BUG_ON(pc != lookup_page_cgroup(page));
+ return page;
+}
+
/* __alloc_bootmem...() is protected by !slab_available() */
static int __init_refok init_section_page_cgroup(unsigned long pfn)
{
- struct mem_section *section = __pfn_to_section(pfn);
struct page_cgroup *base, *pc;
+ struct mem_section *section;
unsigned long table_size;
+ unsigned long nr;
int nid, index;
- if (!section->page_cgroup) {
- nid = page_to_nid(pfn_to_page(pfn));
- table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
- VM_BUG_ON(!slab_is_available());
- if (node_state(nid, N_HIGH_MEMORY)) {
- base = kmalloc_node(table_size,
- GFP_KERNEL | __GFP_NOWARN, nid);
- if (!base)
- base = vmalloc_node(table_size, nid);
- } else {
- base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
- if (!base)
- base = vmalloc(table_size);
- }
- /*
- * The value stored in section->page_cgroup is (base - pfn)
- * and it does not point to the memory block allocated above,
- * causing kmemleak false positives.
- */
- kmemleak_not_leak(base);
+ nr = pfn_to_section_nr(pfn);
+ section = __nr_to_section(nr);
+
+ if (section->page_cgroup)
+ return 0;
+
+ nid = page_to_nid(pfn_to_page(pfn));
+ table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+ VM_BUG_ON(!slab_is_available());
+ if (node_state(nid, N_HIGH_MEMORY)) {
+ base = kmalloc_node(table_size,
+ GFP_KERNEL | __GFP_NOWARN, nid);
+ if (!base)
+ base = vmalloc_node(table_size, nid);
} else {
- /*
- * We don't have to allocate page_cgroup again, but
- * address of memmap may be changed. So, we have to initialize
- * again.
- */
- base = section->page_cgroup + pfn;
- table_size = 0;
- /* check address of memmap is changed or not. */
- if (base->page == pfn_to_page(pfn))
- return 0;
+ base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
+ if (!base)
+ base = vmalloc(table_size);
}
+ /*
+ * The value stored in section->page_cgroup is (base - pfn)
+ * and it does not point to the memory block allocated above,
+ * causing kmemleak false positives.
+ */
+ kmemleak_not_leak(base);
if (!base) {
printk(KERN_ERR "page cgroup allocation failure\n");
@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
for (index = 0; index < PAGES_PER_SECTION; index++) {
pc = base + index;
- __init_page_cgroup(pc, pfn + index);
+ init_page_cgroup(pc, nr);
}
section->page_cgroup = base - pfn;