diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 281 |
1 files changed, 183 insertions, 98 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 901121af4e54..2b3bf6767d54 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -612,6 +612,9 @@ static bool need_debug_guardpage(void) if (!debug_pagealloc_enabled()) return false; + if (!debug_guardpage_minorder()) + return false; + return true; } @@ -620,6 +623,9 @@ static void init_debug_guardpage(void) if (!debug_pagealloc_enabled()) return; + if (!debug_guardpage_minorder()) + return; + _debug_guardpage_enabled = true; } @@ -640,19 +646,22 @@ static int __init debug_guardpage_minorder_setup(char *buf) pr_info("Setting debug_guardpage_minorder to %lu\n", res); return 0; } -__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup); +early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup); -static inline void set_page_guard(struct zone *zone, struct page *page, +static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { struct page_ext *page_ext; if (!debug_guardpage_enabled()) - return; + return false; + + if (order >= debug_guardpage_minorder()) + return false; page_ext = lookup_page_ext(page); if (unlikely(!page_ext)) - return; + return false; __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); @@ -660,6 +669,8 @@ static inline void set_page_guard(struct zone *zone, struct page *page, set_page_private(page, order); /* Guard pages are not available for any usage */ __mod_zone_freepage_state(zone, -(1 << order), migratetype); + + return true; } static inline void clear_page_guard(struct zone *zone, struct page *page, @@ -681,9 +692,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, __mod_zone_freepage_state(zone, (1 << order), migratetype); } #else -struct page_ext_operations debug_guardpage_ops = { NULL, }; -static inline void set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) {} +struct page_ext_operations debug_guardpage_ops; +static inline bool set_page_guard(struct zone *zone, struct page *page, + unsigned int order, int migratetype) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) {} #endif @@ -1398,15 +1409,18 @@ static void __init deferred_free_range(struct page *page, return; /* Free a large naturally-aligned chunk if possible */ - if (nr_pages == MAX_ORDER_NR_PAGES && - (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) { + if (nr_pages == pageblock_nr_pages && + (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, MAX_ORDER-1); + __free_pages_boot_core(page, pageblock_order); return; } - for (i = 0; i < nr_pages; i++, page++) + for (i = 0; i < nr_pages; i++, page++, pfn++) { + if ((pfn & (pageblock_nr_pages - 1)) == 0) + set_pageblock_migratetype(page, MIGRATE_MOVABLE); __free_pages_boot_core(page, 0); + } } /* Completion tracking for deferred_init_memmap() threads */ @@ -1474,9 +1488,9 @@ static int __init deferred_init_memmap(void *data) /* * Ensure pfn_valid is checked every - * MAX_ORDER_NR_PAGES for memory holes + * pageblock_nr_pages for memory holes */ - if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) { + if ((pfn & (pageblock_nr_pages - 1)) == 0) { if (!pfn_valid(pfn)) { page = NULL; goto free_range; @@ -1489,7 +1503,7 @@ static int __init deferred_init_memmap(void *data) } /* Minimise pfn page lookups and scheduler checks */ - if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) { + if (page && (pfn & (pageblock_nr_pages - 1)) != 0) { page++; } else { nr_pages += nr_to_free; @@ -1525,6 +1539,9 @@ free_range: free_base_page = NULL; free_base_pfn = nr_to_free = 0; } + /* Free the last block of pages to allocator */ + nr_pages += nr_to_free; + deferred_free_range(free_base_page, free_base_pfn, nr_to_free); first_init_pfn = max(end_pfn, first_init_pfn); } @@ -1621,18 +1638,15 @@ static inline void expand(struct zone *zone, struct page *page, size >>= 1; VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); - if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && - debug_guardpage_enabled() && - high < debug_guardpage_minorder()) { - /* - * Mark as guard pages (or page), that will allow to - * merge back to allocator when buddy will be freed. - * Corresponding page table entries will not be touched, - * pages will stay not present in virtual address space - */ - set_page_guard(zone, &page[size], high, migratetype); + /* + * Mark as guard pages (or page), that will allow to + * merge back to allocator when buddy will be freed. + * Corresponding page table entries will not be touched, + * pages will stay not present in virtual address space + */ + if (set_page_guard(zone, &page[size], high, migratetype)) continue; - } + list_add(&page[size].lru, &area->free_list[migratetype]); area->nr_free++; set_page_order(&page[size], high); @@ -2494,9 +2508,14 @@ int __isolate_free_page(struct page *page, unsigned int order) mt = get_pageblock_migratetype(page); if (!is_migrate_isolate(mt)) { - /* Obey watermarks as if the page was being allocated */ - watermark = low_wmark_pages(zone) + (1 << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + /* + * Obey watermarks as if the page was being allocated. We can + * emulate a high-order watermark check with a raised order-0 + * watermark, because we already know our high-order page + * exists. + */ + watermark = min_wmark_pages(zone) + (1UL << order); + if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -2965,9 +2984,11 @@ static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); -void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) +void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) { unsigned int filter = SHOW_MEM_FILTER_NODES; + struct va_format vaf; + va_list args; if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || debug_guardpage_minorder() > 0) @@ -2985,22 +3006,16 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) filter &= ~SHOW_MEM_FILTER_NODES; - if (fmt) { - struct va_format vaf; - va_list args; + pr_warn("%s: ", current->comm); - va_start(args, fmt); + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + pr_cont("%pV", &vaf); + va_end(args); - vaf.fmt = fmt; - vaf.va = &args; + pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask); - pr_warn("%pV", &vaf); - - va_end(args); - } - - pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n", - current->comm, order, gfp_mask, &gfp_mask); dump_stack(); if (!should_suppress_show_mem()) show_mem(filter); @@ -3142,6 +3157,65 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } +static inline bool +should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, + enum compact_result compact_result, + enum compact_priority *compact_priority, + int *compaction_retries) +{ + int max_retries = MAX_COMPACT_RETRIES; + int min_priority; + + if (!order) + return false; + + if (compaction_made_progress(compact_result)) + (*compaction_retries)++; + + /* + * compaction considers all the zone as desperately out of memory + * so it doesn't really make much sense to retry except when the + * failure could be caused by insufficient priority + */ + if (compaction_failed(compact_result)) + goto check_priority; + + /* + * make sure the compaction wasn't deferred or didn't bail out early + * due to locks contention before we declare that we should give up. + * But do not retry if the given zonelist is not suitable for + * compaction. + */ + if (compaction_withdrawn(compact_result)) + return compaction_zonelist_suitable(ac, order, alloc_flags); + + /* + * !costly requests are much more important than __GFP_REPEAT + * costly ones because they are de facto nofail and invoke OOM + * killer to move on while costly can fail and users are ready + * to cope with that. 1/4 retries is rather arbitrary but we + * would need much more detailed feedback from compaction to + * make a better decision. + */ + if (order > PAGE_ALLOC_COSTLY_ORDER) + max_retries /= 4; + if (*compaction_retries <= max_retries) + return true; + + /* + * Make sure there are attempts at the highest priority if we exhausted + * all retries or failed at the lower priorities. + */ +check_priority: + min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ? + MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY; + if (*compact_priority > min_priority) { + (*compact_priority)--; + *compaction_retries = 0; + return true; + } + return false; +} #else static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, @@ -3152,13 +3226,11 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } -#endif /* CONFIG_COMPACTION */ - static inline bool should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, enum compact_result compact_result, enum compact_priority *compact_priority, - int compaction_retries) + int *compaction_retries) { struct zone *zone; struct zoneref *z; @@ -3180,6 +3252,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla } return false; } +#endif /* CONFIG_COMPACTION */ /* Perform direct synchronous page reclaim */ static int @@ -3330,16 +3403,26 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) static inline bool should_reclaim_retry(gfp_t gfp_mask, unsigned order, struct alloc_context *ac, int alloc_flags, - bool did_some_progress, int no_progress_loops) + bool did_some_progress, int *no_progress_loops) { struct zone *zone; struct zoneref *z; /* + * Costly allocations might have made a progress but this doesn't mean + * their order will become available due to high fragmentation so + * always increment the no progress counter for them + */ + if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) + *no_progress_loops = 0; + else + (*no_progress_loops)++; + + /* * Make sure we converge to OOM if we cannot make any progress * several times in the row. */ - if (no_progress_loops > MAX_RECLAIM_RETRIES) + if (*no_progress_loops > MAX_RECLAIM_RETRIES) return false; /* @@ -3354,7 +3437,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, unsigned long reclaimable; available = reclaimable = zone_reclaimable_pages(zone); - available -= DIV_ROUND_UP(no_progress_loops * available, + available -= DIV_ROUND_UP((*no_progress_loops) * available, MAX_RECLAIM_RETRIES); available += zone_page_state_snapshot(zone, NR_FREE_PAGES); @@ -3415,6 +3498,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, enum compact_result compact_result; int compaction_retries = 0; int no_progress_loops = 0; + unsigned long alloc_start = jiffies; + unsigned int stall_timeout = 10 * HZ; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -3559,9 +3644,6 @@ retry: if (page) goto got_pg; - if (order && compaction_made_progress(compact_result)) - compaction_retries++; - /* Do not loop if specifically requested */ if (gfp_mask & __GFP_NORETRY) goto nopage; @@ -3573,18 +3655,16 @@ retry: if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) goto nopage; - /* - * Costly allocations might have made a progress but this doesn't mean - * their order will become available due to high fragmentation so - * always increment the no progress counter for them - */ - if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) - no_progress_loops = 0; - else - no_progress_loops++; + /* Make sure we know about allocations which stall for too long */ + if (time_after(jiffies, alloc_start + stall_timeout)) { + warn_alloc(gfp_mask, + "page alloction stalls for %ums, order:%u\n", + jiffies_to_msecs(jiffies-alloc_start), order); + stall_timeout += 10 * HZ; + } if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, - did_some_progress > 0, no_progress_loops)) + did_some_progress > 0, &no_progress_loops)) goto retry; /* @@ -3596,7 +3676,7 @@ retry: if (did_some_progress > 0 && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, - compaction_retries)) + &compaction_retries)) goto retry; /* Reclaim has failed us, start killing things */ @@ -3611,7 +3691,8 @@ retry: } nopage: - warn_alloc_failed(gfp_mask, order, NULL); + warn_alloc(gfp_mask, + "page allocation failure: order:%u", order); got_pg: return page; } @@ -4560,7 +4641,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) int j; struct zonelist *zonelist; - zonelist = &pgdat->node_zonelists[0]; + zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK]; for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++) ; j = build_zonelists_node(NODE_DATA(node), zonelist, j); @@ -4576,7 +4657,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat) int j; struct zonelist *zonelist; - zonelist = &pgdat->node_zonelists[1]; + zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK]; j = build_zonelists_node(pgdat, zonelist, 0); zonelist->_zonerefs[j].zone = NULL; zonelist->_zonerefs[j].zone_idx = 0; @@ -4597,7 +4678,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) struct zone *z; struct zonelist *zonelist; - zonelist = &pgdat->node_zonelists[0]; + zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK]; pos = 0; for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) { for (j = 0; j < nr_nodes; j++) { @@ -4732,7 +4813,7 @@ static void build_zonelists(pg_data_t *pgdat) local_node = pgdat->node_id; - zonelist = &pgdat->node_zonelists[0]; + zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK]; j = build_zonelists_node(pgdat, zonelist, 0); /* @@ -5005,15 +5086,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* - * If not mirrored_kernelcore and ZONE_MOVABLE exists, range - * from zone_movable_pfn[nid] to end of each node should be - * ZONE_MOVABLE not ZONE_NORMAL. skip it. - */ - if (!mirrored_kernelcore && zone_movable_pfn[nid]) - if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid]) - continue; - - /* * Check given memblock attribute by firmware which can affect * kernel memory layout. If zone==ZONE_MOVABLE but memory is * mirrored, it's an overlapped memmap init. skip it. @@ -5456,6 +5528,12 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid, *zone_end_pfn = min(node_end_pfn, arch_zone_highest_possible_pfn[movable_zone]); + /* Adjust for ZONE_MOVABLE starting within this range */ + } else if (!mirrored_kernelcore && + *zone_start_pfn < zone_movable_pfn[nid] && + *zone_end_pfn > zone_movable_pfn[nid]) { + *zone_end_pfn = zone_movable_pfn[nid]; + /* Check if this whole range is within ZONE_MOVABLE */ } else if (*zone_start_pfn >= zone_movable_pfn[nid]) *zone_start_pfn = *zone_end_pfn; @@ -5559,28 +5637,23 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages * and vice versa. */ - if (zone_movable_pfn[nid]) { - if (mirrored_kernelcore) { - unsigned long start_pfn, end_pfn; - struct memblock_region *r; - - for_each_memblock(memory, r) { - start_pfn = clamp(memblock_region_memory_base_pfn(r), - zone_start_pfn, zone_end_pfn); - end_pfn = clamp(memblock_region_memory_end_pfn(r), - zone_start_pfn, zone_end_pfn); - - if (zone_type == ZONE_MOVABLE && - memblock_is_mirror(r)) - nr_absent += end_pfn - start_pfn; - - if (zone_type == ZONE_NORMAL && - !memblock_is_mirror(r)) - nr_absent += end_pfn - start_pfn; - } - } else { - if (zone_type == ZONE_NORMAL) - nr_absent += node_end_pfn - zone_movable_pfn[nid]; + if (mirrored_kernelcore && zone_movable_pfn[nid]) { + unsigned long start_pfn, end_pfn; + struct memblock_region *r; + + for_each_memblock(memory, r) { + start_pfn = clamp(memblock_region_memory_base_pfn(r), + zone_start_pfn, zone_end_pfn); + end_pfn = clamp(memblock_region_memory_end_pfn(r), + zone_start_pfn, zone_end_pfn); + + if (zone_type == ZONE_MOVABLE && + memblock_is_mirror(r)) + nr_absent += end_pfn - start_pfn; + + if (zone_type == ZONE_NORMAL && + !memblock_is_mirror(r)) + nr_absent += end_pfn - start_pfn; } } @@ -6934,6 +7007,17 @@ static int __init set_hashdist(char *str) __setup("hashdist=", set_hashdist); #endif +#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES +/* + * Returns the number of pages that arch has reserved but + * is not known to alloc_large_system_hash(). + */ +static unsigned long __init arch_reserved_kernel_pages(void) +{ + return 0; +} +#endif + /* * allocate a large system hash table from bootmem * - it is assumed that the hash table must contain an exact power-of-2 @@ -6958,6 +7042,7 @@ void *__init alloc_large_system_hash(const char *tablename, if (!numentries) { /* round applicable memory size up to nearest megabyte */ numentries = nr_kernel_pages; + numentries -= arch_reserved_kernel_pages(); /* It isn't necessary when PAGE_SIZE >= 1MB */ if (PAGE_SHIFT < 20) |