From 93b4796dede916de74b21fbd637588da6a99a7ec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 12 Dec 2012 13:50:54 -0800 Subject: thp: do_huge_pmd_wp_page(): handle huge zero page On write access to huge zero page we alloc a new huge page and clear it. If ENOMEM, graceful fallback: we create a new pmd table and set pte around fault address to newly allocated normal (4k) page. All other ptes in the pmd set to normal zero page. Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: Mel Gorman Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4af4f0b1be4c..4b80bad4a367 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -516,6 +516,14 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } #endif +#ifndef my_zero_pfn +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + extern unsigned long zero_pfn; + return zero_pfn; +} +#endif + /* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of -- cgit v1.2.3 From e180377f1ae48b3cbc559c9875d9b038f7f000c6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 12 Dec 2012 13:50:59 -0800 Subject: thp: change split_huge_page_pmd() interface Pass vma instead of mm and add address parameter. In most cases we already have vma on the stack. We provides split_huge_page_pmd_mm() for few cases when we have mm, but not vma. This change is preparation to huge zero pmd splitting implementation. Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1af477552459..3132ea788581 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -95,12 +95,14 @@ extern int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags); extern int split_huge_page(struct page *page); -extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); -#define split_huge_page_pmd(__mm, __pmd) \ +extern void __split_huge_page_pmd(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd); +#define split_huge_page_pmd(__vma, __address, __pmd) \ do { \ pmd_t *____pmd = (__pmd); \ if (unlikely(pmd_trans_huge(*____pmd))) \ - __split_huge_page_pmd(__mm, ____pmd); \ + __split_huge_page_pmd(__vma, __address, \ + ____pmd); \ } while (0) #define wait_split_huge_page(__anon_vma, __pmd) \ do { \ @@ -110,6 +112,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); BUG_ON(pmd_trans_splitting(*____pmd) || \ pmd_trans_huge(*____pmd)); \ } while (0) +extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, + pmd_t *pmd); #if HPAGE_PMD_ORDER > MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif @@ -177,10 +181,12 @@ static inline int split_huge_page(struct page *page) { return 0; } -#define split_huge_page_pmd(__mm, __pmd) \ +#define split_huge_page_pmd(__vma, __address, __pmd) \ do { } while (0) #define wait_split_huge_page(__anon_vma, __pmd) \ do { } while (0) +#define split_huge_page_pmd_mm(__mm, __address, __pmd) \ + do { } while (0) #define compound_trans_head(page) compound_head(page) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) -- cgit v1.2.3 From d8a8e1f0da3d29d7268b3300c96a059d63901b76 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 12 Dec 2012 13:51:09 -0800 Subject: thp, vmstat: implement HZP_ALLOC and HZP_ALLOC_FAILED events hzp_alloc is incremented every time a huge zero page is successfully allocated. It includes allocations which where dropped due race with other allocation. Note, it doesn't count every map of the huge zero page, only its allocation. hzp_alloc_failed is incremented if kernel fails to allocate huge zero page and falls back to using small pages. Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 3d3114594370..fe786f07d2bd 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -58,6 +58,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_COLLAPSE_ALLOC, THP_COLLAPSE_ALLOC_FAILED, THP_SPLIT, + THP_ZERO_PAGE_ALLOC, + THP_ZERO_PAGE_ALLOC_FAILED, #endif NR_VM_EVENT_ITEMS }; -- cgit v1.2.3 From 79da5407eeadc740fbf4b45d6df7d7f8e6adaf2c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 12 Dec 2012 13:51:12 -0800 Subject: thp: introduce sysfs knob to disable huge zero page By default kernel tries to use huge zero page on read page fault. It's possible to disable huge zero page by writing 0 or enable it back by writing 1: echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 3132ea788581..092dc5305a32 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,7 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif @@ -78,6 +79,9 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); (transparent_hugepage_flags & \ (1<vm_flags & VM_HUGEPAGE)) +#define transparent_hugepage_use_zero_page() \ + (transparent_hugepage_flags & \ + (1< Date: Wed, 12 Dec 2012 13:51:21 -0800 Subject: mm: node_states: introduce N_MEMORY We have N_NORMAL_MEMORY for standing for the nodes that have normal memory with zone_type <= ZONE_NORMAL. And we have N_HIGH_MEMORY for standing for the nodes that have normal or high memory. But we don't have any word to stand for the nodes that have *any* memory. And we have N_CPU but without N_MEMORY. Current code reuse the N_HIGH_MEMORY for this purpose because any node which has memory must have high memory or normal memory currently. A) But this reusing is bad for *readability*. Because the name N_HIGH_MEMORY just stands for high or normal: A.example 1) mem_cgroup_nr_lru_pages(): for_each_node_state(nid, N_HIGH_MEMORY) The user will be confused(why this function just counts for high or normal memory node? does it counts for ZONE_MOVABLE's lru pages?) until someone else tell them N_HIGH_MEMORY is reused to stand for nodes that have any memory. A.cont) If we introduce N_MEMORY, we can reduce this confusing AND make the code more clearly: A.example 2) mm/page_cgroup.c use N_HIGH_MEMORY twice: One is in page_cgroup_init(void): for_each_node_state(nid, N_HIGH_MEMORY) { It means if the node have memory, we will allocate page_cgroup map for the node. We should use N_MEMORY instead here to gaim more clearly. The second using is in alloc_page_cgroup(): if (node_state(nid, N_HIGH_MEMORY)) addr = vzalloc_node(size, nid); It means if the node has high or normal memory that can be allocated from kernel. We should keep N_HIGH_MEMORY here, and it will be better if the "any memory" semantic of N_HIGH_MEMORY is removed. B) This reusing is out-dated if we introduce MOVABLE-dedicated node. The MOVABLE-dedicated node should not appear in node_stats[N_HIGH_MEMORY] nor node_stats[N_NORMAL_MEMORY], because MOVABLE-dedicated node has no high or normal memory. In x86_64, N_HIGH_MEMORY=N_NORMAL_MEMORY, if a MOVABLE-dedicated node is in node_stats[N_HIGH_MEMORY], it is also means it is in node_stats[N_NORMAL_MEMORY], it causes SLUB wrong. The slub uses for_each_node_state(nid, N_NORMAL_MEMORY) and creates kmem_cache_node for MOVABLE-dedicated node and cause problem. In one word, we need a N_MEMORY. We just intrude it as an alias to N_HIGH_MEMORY and fix all im-proper usages of N_HIGH_MEMORY in late patches. Signed-off-by: Lai Jiangshan Acked-by: Christoph Lameter Acked-by: Hillf Danton Signed-off-by: Wen Congyang Cc: Lin Feng Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nodemask.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 7afc36334d52..c6ebdc97a428 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -380,6 +380,7 @@ enum node_states { #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif + N_MEMORY = N_HIGH_MEMORY, N_CPU, /* The node has one or more cpus */ NR_NODE_STATES }; -- cgit v1.2.3 From 38d7bee9d24adf4c95676a3dc902827c72930ebb Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 12 Dec 2012 13:51:24 -0800 Subject: cpuset: use N_MEMORY instead N_HIGH_MEMORY N_HIGH_MEMORY stands for the nodes that has normal or high memory. N_MEMORY stands for the nodes that has any memory. The code here need to handle with the nodes which have memory, we should use N_MEMORY instead. Signed-off-by: Lai Jiangshan Acked-by: Hillf Danton Signed-off-by: Wen Congyang Cc: Christoph Lameter Cc: Lin Feng Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 838320fc3d1d..8c8a60d29407 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -144,7 +144,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) return node_possible_map; } -#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) +#define cpuset_current_mems_allowed (node_states[N_MEMORY]) static inline void cpuset_init_current_mems_allowed(void) {} static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) -- cgit v1.2.3 From 6715ddf94576ff39f5d1cda8d4c568d3b79e82ec Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 12 Dec 2012 13:51:49 -0800 Subject: hotplug: update nodemasks management Update nodemasks management for N_MEMORY. [lliubbo@gmail.com: fix build] Signed-off-by: Lai Jiangshan Signed-off-by: Wen Congyang Cc: Christoph Lameter Cc: Hillf Danton Cc: Lin Feng Cc: David Rientjes Signed-off-by: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index a09216d0dcc7..45e93b468878 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -54,6 +54,7 @@ struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; + int status_change_nid_high; int status_change_nid; }; -- cgit v1.2.3 From 44e33e8f95171b93968c3ac3cf8516998b1db65d Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 12 Dec 2012 13:51:52 -0800 Subject: res_counter: delete res_counter_write() Since commit 628f42355389 ("memcg: limit change shrink usage") both res_counter_write() and write_strategy_fn have been unused. This patch deletes them both. Signed-off-by: Greg Thelen Cc: Glauber Costa Cc: Tejun Heo Acked-by: KAMEZAWA Hiroyuki Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 7d7fbe2ef782..6f54e40fa218 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -74,14 +74,9 @@ ssize_t res_counter_read(struct res_counter *counter, int member, const char __user *buf, size_t nbytes, loff_t *pos, int (*read_strategy)(unsigned long long val, char *s)); -typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val); - int res_counter_memparse_write_strategy(const char *buf, unsigned long long *res); -int res_counter_write(struct res_counter *counter, int member, - const char *buffer, write_strategy_fn write_strategy); - /* * the field descriptors. one for each member of res_counter */ -- cgit v1.2.3 From 05b0afd73d04109d87f00ccd39f099e217c37263 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 12 Dec 2012 13:51:56 -0800 Subject: mm: add a reminder comment for __GFP_BITS_SHIFT Cc: Glauber Costa Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 31e8041274f6..f74856e17e48 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -34,6 +34,7 @@ struct vm_area_struct; #define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_OTHER_NODE 0x800000u #define ___GFP_WRITE 0x1000000u +/* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* * GFP bitmasks.. -- cgit v1.2.3 From 68ae564bbac8eb9ed54ddd2529b0e29ee190b355 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 12 Dec 2012 13:51:57 -0800 Subject: mm, memcg: avoid unnecessary function call when memcg is disabled While profiling numa/core v16 with cgroup_disable=memory on the command line, I noticed mem_cgroup_count_vm_event() still showed up as high as 0.60% in perftop. This occurs because the function is called extremely often even when memcg is disabled. To fix this, inline the check for mem_cgroup_disabled() so we avoid the unnecessary function call if memcg is disabled. Signed-off-by: David Rientjes Acked-by: KAMEZAWA Hiroyuki Acked-by: Glauber Costa Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 11ddc7ffeba8..e98a74c0c9c0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -181,7 +181,14 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, unsigned long *total_scanned); -void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); +void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); +static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, + enum vm_event_item idx) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_count_vm_event(mm, idx); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); #endif -- cgit v1.2.3 From 20b2f52b73febce476fc9376f0296c1aa0e4f5a7 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 12 Dec 2012 13:52:00 -0800 Subject: numa: add CONFIG_MOVABLE_NODE for movable-dedicated node We need a node which only contains movable memory. This feature is very important for node hotplug. If a node has normal/highmem, the memory may be used by the kernel and can't be offlined. If the node only contains movable memory, we can offline the memory and the node. All are prepared, we can actually introduce N_MEMORY. add CONFIG_MOVABLE_NODE make we can use it for movable-dedicated node [akpm@linux-foundation.org: fix Kconfig text] Signed-off-by: Lai Jiangshan Tested-by: Yasuaki Ishimatsu Signed-off-by: Wen Congyang Cc: Jiang Liu Cc: KOSAKI Motohiro Cc: Minchan Kim Cc: Mel Gorman Cc: David Rientjes Cc: Yinghai Lu Cc: Rusty Russell Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nodemask.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index c6ebdc97a428..4e2cbfa640b7 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -380,7 +380,11 @@ enum node_states { #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif +#ifdef CONFIG_MOVABLE_NODE + N_MEMORY, /* The node has memory(regular, high, movable) */ +#else N_MEMORY = N_HIGH_MEMORY, +#endif N_CPU, /* The node has one or more cpus */ NR_NODE_STATES }; -- cgit v1.2.3 From 9feedc9d831e18ae6d0d15aa562e5e46ba53647b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 12 Dec 2012 13:52:12 -0800 Subject: mm: introduce new field "managed_pages" to struct zone Currently a zone's present_pages is calcuated as below, which is inaccurate and may cause trouble to memory hotplug. spanned_pages - absent_pages - memmap_pages - dma_reserve. During fixing bugs caused by inaccurate zone->present_pages, we found zone->present_pages has been abused. The field zone->present_pages may have different meanings in different contexts: 1) pages existing in a zone. 2) pages managed by the buddy system. For more discussions about the issue, please refer to: http://lkml.org/lkml/2012/11/5/866 https://patchwork.kernel.org/patch/1346751/ This patchset tries to introduce a new field named "managed_pages" to struct zone, which counts "pages managed by the buddy system". And revert zone->present_pages to count "physical pages existing in a zone", which also keep in consistence with pgdat->node_present_pages. We will set an initial value for zone->managed_pages in function free_area_init_core() and will adjust it later if the initial value is inaccurate. For DMA/normal zones, the initial value is set to: (spanned_pages - absent_pages - memmap_pages - dma_reserve) Later zone->managed_pages will be adjusted to the accurate value when the bootmem allocator frees all free pages to the buddy system in function free_all_bootmem_node() and free_all_bootmem(). The bootmem allocator doesn't touch highmem pages, so highmem zones' managed_pages is set to the accurate value "spanned_pages - absent_pages" in function free_area_init_core() and won't be updated anymore. This patch also adds a new field "managed_pages" to /proc/zoneinfo and sysrq showmem. [akpm@linux-foundation.org: small comment tweaks] Signed-off-by: Jiang Liu Cc: Wen Congyang Cc: David Rientjes Cc: Maciej Rutecki Tested-by: Chris Clayton Cc: "Rafael J . Wysocki" Cc: Mel Gorman Cc: Minchan Kim Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Jianguo Wu Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c0b1d608a69..cd55dad56aac 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -460,17 +460,44 @@ struct zone { unsigned long zone_start_pfn; /* - * zone_start_pfn, spanned_pages and present_pages are all - * protected by span_seqlock. It is a seqlock because it has - * to be read outside of zone->lock, and it is done in the main - * allocator path. But, it is written quite infrequently. + * spanned_pages is the total pages spanned by the zone, including + * holes, which is calculated as: + * spanned_pages = zone_end_pfn - zone_start_pfn; * - * The lock is declared along with zone->lock because it is + * present_pages is physical pages existing within the zone, which + * is calculated as: + * present_pages = spanned_pages - absent_pages(pags in holes); + * + * managed_pages is present pages managed by the buddy system, which + * is calculated as (reserved_pages includes pages allocated by the + * bootmem allocator): + * managed_pages = present_pages - reserved_pages; + * + * So present_pages may be used by memory hotplug or memory power + * management logic to figure out unmanaged pages by checking + * (present_pages - managed_pages). And managed_pages should be used + * by page allocator and vm scanner to calculate all kinds of watermarks + * and thresholds. + * + * Locking rules: + * + * zone_start_pfn and spanned_pages are protected by span_seqlock. + * It is a seqlock because it has to be read outside of zone->lock, + * and it is done in the main allocator path. But, it is written + * quite infrequently. + * + * The span_seq lock is declared along with zone->lock because it is * frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. + * + * Write access to present_pages and managed_pages at runtime should + * be protected by lock_memory_hotplug()/unlock_memory_hotplug(). + * Any reader who can't tolerant drift of present_pages and + * managed_pages should hold memory hotplug lock to get a stable value. */ - unsigned long spanned_pages; /* total size, including holes */ - unsigned long present_pages; /* amount of memory (excluding holes) */ + unsigned long spanned_pages; + unsigned long present_pages; + unsigned long managed_pages; /* * rarely used fields: -- cgit v1.2.3 From 816422ad76474fed8052b6f7b905a054d082e59a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 12 Dec 2012 13:52:36 -0800 Subject: asm-generic, mm: pgtable: consolidate zero page helpers We have two different implementation of is_zero_pfn() and my_zero_pfn() helpers: for architectures with and without zero page coloring. Let's consolidate them in . Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 26 ++++++++++++++++++++++++++ include/linux/mm.h | 8 -------- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b36ce40bd1c6..284e80831d2c 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -449,6 +449,32 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size); #endif +#ifdef __HAVE_COLOR_ZERO_PAGE +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return page_to_pfn(ZERO_PAGE(addr)); +} +#else +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + return pfn == zero_pfn; +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + extern unsigned long zero_pfn; + return zero_pfn; +} +#endif + #ifdef CONFIG_MMU #ifndef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/mm.h b/include/linux/mm.h index 4b80bad4a367..4af4f0b1be4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -516,14 +516,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } #endif -#ifndef my_zero_pfn -static inline unsigned long my_zero_pfn(unsigned long addr) -{ - extern unsigned long zero_pfn; - return zero_pfn; -} -#endif - /* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of -- cgit v1.2.3 From 98870901cce098bbe94d90d2c41d8d1fa8d94392 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Wed, 12 Dec 2012 13:52:39 -0800 Subject: mm/bootmem.c: remove unused wrapper function reserve_bootmem_generic() reserve_bootmem_generic() has no caller, Signed-off-by: Lin Feng Acked-by: Johannes Weiner Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 7b74452c5317..3f778c27f825 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -137,9 +137,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, - int flags); - #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP extern void *alloc_remap(int nid, unsigned long size); #else -- cgit v1.2.3