diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 738 |
1 files changed, 205 insertions, 533 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 33dc256033b5..eeb3bc9d1d36 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -53,24 +53,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/vmscan.h> -/* - * reclaim_mode determines how the inactive list is shrunk - * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages - * RECLAIM_MODE_ASYNC: Do not block - * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback - * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference - * page from the LRU and reclaim all pages within a - * naturally aligned range - * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of - * order-0 pages and then compact the zone - */ -typedef unsigned __bitwise__ reclaim_mode_t; -#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) -#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) -#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) -#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u) -#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) - struct scan_control { /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; @@ -96,11 +78,8 @@ struct scan_control { int order; - /* - * Intend to reclaim enough continuous memory rather than reclaim - * enough amount of memory. i.e, mode for high order allocation. - */ - reclaim_mode_t reclaim_mode; + /* Scan (total_size >> priority) pages at once */ + int priority; /* * The memory cgroup that hit its limit and as a result is the @@ -115,11 +94,6 @@ struct scan_control { nodemask_t *nodemask; }; -struct mem_cgroup_zone { - struct mem_cgroup *mem_cgroup; - struct zone *zone; -}; - #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) #ifdef ARCH_HAS_PREFETCH @@ -164,44 +138,21 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } - -static bool scanning_global_lru(struct mem_cgroup_zone *mz) -{ - return !mz->mem_cgroup; -} #else static bool global_reclaim(struct scan_control *sc) { return true; } - -static bool scanning_global_lru(struct mem_cgroup_zone *mz) -{ - return true; -} #endif -static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) -{ - if (!scanning_global_lru(mz)) - return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); - - return &mz->zone->reclaim_stat; -} - -static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, - enum lru_list lru) +static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) { - if (!scanning_global_lru(mz)) - return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, - zone_to_nid(mz->zone), - zone_idx(mz->zone), - BIT(lru)); + if (!mem_cgroup_disabled()) + return mem_cgroup_get_lru_size(lruvec, lru); - return zone_page_state(mz->zone, NR_LRU_BASE + lru); + return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); } - /* * Add a shrinker callback to be called from the vm */ @@ -364,39 +315,6 @@ out: return ret; } -static void set_reclaim_mode(int priority, struct scan_control *sc, - bool sync) -{ - reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; - - /* - * Initially assume we are entering either lumpy reclaim or - * reclaim/compaction.Depending on the order, we will either set the - * sync mode or just reclaim order-0 pages later. - */ - if (COMPACTION_BUILD) - sc->reclaim_mode = RECLAIM_MODE_COMPACTION; - else - sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM; - - /* - * Avoid using lumpy reclaim or reclaim/compaction if possible by - * restricting when its set to either costly allocations or when - * under memory pressure - */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - sc->reclaim_mode |= syncmode; - else if (sc->order && priority < DEF_PRIORITY - 2) - sc->reclaim_mode |= syncmode; - else - sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; -} - -static void reset_reclaim_mode(struct scan_control *sc) -{ - sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; -} - static inline int is_page_cache_freeable(struct page *page) { /* @@ -416,10 +334,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi, return 1; if (bdi == current->backing_dev_info) return 1; - - /* lumpy reclaim for hugepage often need a lot of write */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - return 1; return 0; } @@ -523,8 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, /* synchronous write or broken a_ops? */ ClearPageReclaim(page); } - trace_mm_vmscan_writepage(page, - trace_reclaim_flags(page, sc->reclaim_mode)); + trace_mm_vmscan_writepage(page, trace_reclaim_flags(page)); inc_zone_page_state(page, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } @@ -701,19 +614,15 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, - struct mem_cgroup_zone *mz, struct scan_control *sc) { int referenced_ptes, referenced_page; unsigned long vm_flags; - referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); + referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, + &vm_flags); referenced_page = TestClearPageReferenced(page); - /* Lumpy reclaim - ignore references */ - if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) - return PAGEREF_RECLAIM; - /* * Mlock lost the isolation race with us. Let try_to_unmap() * move the page to the unevictable list. @@ -722,7 +631,7 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; if (referenced_ptes) { - if (PageAnon(page)) + if (PageSwapBacked(page)) return PAGEREF_ACTIVATE; /* * All mapped pages start out with page table @@ -763,9 +672,8 @@ static enum page_references page_check_references(struct page *page, * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct mem_cgroup_zone *mz, + struct zone *zone, struct scan_control *sc, - int priority, unsigned long *ret_nr_dirty, unsigned long *ret_nr_writeback) { @@ -794,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != mz->zone); + VM_BUG_ON(page_zone(page) != zone); sc->nr_scanned++; @@ -813,22 +721,11 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageWriteback(page)) { nr_writeback++; - /* - * Synchronous reclaim cannot queue pages for - * writeback due to the possibility of stack overflow - * but if it encounters a page under writeback, wait - * for the IO to complete. - */ - if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) && - may_enter_fs) - wait_on_page_writeback(page); - else { - unlock_page(page); - goto keep_lumpy; - } + unlock_page(page); + goto keep; } - references = page_check_references(page, mz, sc); + references = page_check_references(page, sc); switch (references) { case PAGEREF_ACTIVATE: goto activate_locked; @@ -879,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * unless under significant pressure. */ if (page_is_file_cache(page) && - (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { + (!current_is_kswapd() || + sc->priority >= DEF_PRIORITY - 2)) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -908,7 +806,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto activate_locked; case PAGE_SUCCESS: if (PageWriteback(page)) - goto keep_lumpy; + goto keep; if (PageDirty(page)) goto keep; @@ -994,7 +892,6 @@ cull_mlocked: try_to_free_swap(page); unlock_page(page); putback_lru_page(page); - reset_reclaim_mode(sc); continue; activate_locked: @@ -1007,8 +904,6 @@ activate_locked: keep_locked: unlock_page(page); keep: - reset_reclaim_mode(sc); -keep_lumpy: list_add(&page->lru, &ret_pages); VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } @@ -1020,7 +915,7 @@ keep_lumpy: * will encounter the same problem */ if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) - zone_set_flag(mz->zone, ZONE_CONGESTED); + zone_set_flag(zone, ZONE_CONGESTED); free_hot_cold_page_list(&free_pages, 1); @@ -1041,34 +936,15 @@ keep_lumpy: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode) { - bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; - all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == - (ISOLATE_ACTIVE|ISOLATE_INACTIVE); - - /* - * When checking the active state, we need to be sure we are - * dealing with comparible boolean values. Take the logical not - * of each. - */ - if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) - return ret; - - if (!all_lru_mode && !!page_is_file_cache(page) != file) - return ret; - - /* - * When this function is being called for lumpy reclaim, we - * initially look into all LRU pages, active, inactive and - * unevictable; only give shrink_page_list evictable pages. - */ + /* Do not give back unevictable pages for compaction */ if (PageUnevictable(page)) return ret; @@ -1135,54 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) * Appropriate locks must be held before calling this function. * * @nr_to_scan: The number of pages to look through on the list. - * @mz: The mem_cgroup_zone to pull pages from. + * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session * @mode: One of the LRU isolation modes - * @active: True [1] if isolating active pages - * @file: True [1] if isolating file [!anon] pages + * @lru: LRU list id for isolating * * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, struct list_head *dst, + struct lruvec *lruvec, struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, - isolate_mode_t mode, int active, int file) + isolate_mode_t mode, enum lru_list lru) { - struct lruvec *lruvec; - struct list_head *src; + struct list_head *src = &lruvec->lists[lru]; unsigned long nr_taken = 0; - unsigned long nr_lumpy_taken = 0; - unsigned long nr_lumpy_dirty = 0; - unsigned long nr_lumpy_failed = 0; unsigned long scan; - int lru = LRU_BASE; - - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); - if (active) - lru += LRU_ACTIVE; - if (file) - lru += LRU_FILE; - src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; - unsigned long pfn; - unsigned long end_pfn; - unsigned long page_pfn; - int zone_id; + int nr_pages; page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); VM_BUG_ON(!PageLRU(page)); - switch (__isolate_lru_page(page, mode, file)) { + switch (__isolate_lru_page(page, mode)) { case 0: - mem_cgroup_lru_del(page); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_move(&page->lru, dst); - nr_taken += hpage_nr_pages(page); + nr_taken += nr_pages; break; case -EBUSY: @@ -1193,93 +1054,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, default: BUG(); } - - if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)) - continue; - - /* - * Attempt to take all pages in the order aligned region - * surrounding the tag page. Only take those pages of - * the same active state as that tag page. We may safely - * round the target page pfn down to the requested order - * as the mem_map is guaranteed valid out to MAX_ORDER, - * where that page is in a different zone we will detect - * it from its zone id and abort this block scan. - */ - zone_id = page_zone_id(page); - page_pfn = page_to_pfn(page); - pfn = page_pfn & ~((1 << sc->order) - 1); - end_pfn = pfn + (1 << sc->order); - for (; pfn < end_pfn; pfn++) { - struct page *cursor_page; - - /* The target page is in the block, ignore it. */ - if (unlikely(pfn == page_pfn)) - continue; - - /* Avoid holes within the zone. */ - if (unlikely(!pfn_valid_within(pfn))) - break; - - cursor_page = pfn_to_page(pfn); - - /* Check that we have not crossed a zone boundary. */ - if (unlikely(page_zone_id(cursor_page) != zone_id)) - break; - - /* - * If we don't have enough swap space, reclaiming of - * anon page which don't already have a swap slot is - * pointless. - */ - if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) && - !PageSwapCache(cursor_page)) - break; - - if (__isolate_lru_page(cursor_page, mode, file) == 0) { - unsigned int isolated_pages; - - mem_cgroup_lru_del(cursor_page); - list_move(&cursor_page->lru, dst); - isolated_pages = hpage_nr_pages(cursor_page); - nr_taken += isolated_pages; - nr_lumpy_taken += isolated_pages; - if (PageDirty(cursor_page)) - nr_lumpy_dirty += isolated_pages; - scan++; - pfn += isolated_pages - 1; - } else { - /* - * Check if the page is freed already. - * - * We can't use page_count() as that - * requires compound_head and we don't - * have a pin on the page here. If a - * page is tail, we may or may not - * have isolated the head, so assume - * it's not free, it'd be tricky to - * track the head status without a - * page pin. - */ - if (!PageTail(cursor_page) && - !atomic_read(&cursor_page->_count)) - continue; - break; - } - } - - /* If we break out of the loop above, lumpy reclaim failed */ - if (pfn < end_pfn) - nr_lumpy_failed++; } *nr_scanned = scan; - - trace_mm_vmscan_lru_isolate(sc->order, - nr_to_scan, scan, - nr_taken, - nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, - mode, file); + trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, + nr_taken, mode, is_file_lru(lru)); return nr_taken; } @@ -1316,15 +1095,16 @@ int isolate_lru_page(struct page *page) if (PageLRU(page)) { struct zone *zone = page_zone(page); + struct lruvec *lruvec; spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); if (PageLRU(page)) { int lru = page_lru(page); - ret = 0; get_page(page); ClearPageLRU(page); - - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); + ret = 0; } spin_unlock_irq(&zone->lru_lock); } @@ -1357,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file, } static noinline_for_stack void -putback_inactive_pages(struct mem_cgroup_zone *mz, - struct list_head *page_list) +putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) { - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); - struct zone *zone = mz->zone; + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; + struct zone *zone = lruvec_zone(lruvec); LIST_HEAD(pages_to_free); /* @@ -1379,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); continue; } + + lruvec = mem_cgroup_page_lruvec(page, zone); + SetPageLRU(page); lru = page_lru(page); - add_page_to_lru_list(zone, page, lru); + add_page_to_lru_list(page, lruvec, lru); + if (is_active_lru(lru)) { int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); @@ -1390,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1407,112 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, list_splice(&pages_to_free, page_list); } -static noinline_for_stack void -update_isolated_counts(struct mem_cgroup_zone *mz, - struct list_head *page_list, - unsigned long *nr_anon, - unsigned long *nr_file) -{ - struct zone *zone = mz->zone; - unsigned int count[NR_LRU_LISTS] = { 0, }; - unsigned long nr_active = 0; - struct page *page; - int lru; - - /* - * Count pages and clear active flags - */ - list_for_each_entry(page, page_list, lru) { - int numpages = hpage_nr_pages(page); - lru = page_lru_base_type(page); - if (PageActive(page)) { - lru += LRU_ACTIVE; - ClearPageActive(page); - nr_active += numpages; - } - count[lru] += numpages; - } - - preempt_disable(); - __count_vm_events(PGDEACTIVATE, nr_active); - - __mod_zone_page_state(zone, NR_ACTIVE_FILE, - -count[LRU_ACTIVE_FILE]); - __mod_zone_page_state(zone, NR_INACTIVE_FILE, - -count[LRU_INACTIVE_FILE]); - __mod_zone_page_state(zone, NR_ACTIVE_ANON, - -count[LRU_ACTIVE_ANON]); - __mod_zone_page_state(zone, NR_INACTIVE_ANON, - -count[LRU_INACTIVE_ANON]); - - *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - - __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); - preempt_enable(); -} - -/* - * Returns true if a direct reclaim should wait on pages under writeback. - * - * If we are direct reclaiming for contiguous pages and we do not reclaim - * everything in the list, try again and wait for writeback IO to complete. - * This will stall high-order allocations noticeably. Only do that when really - * need to free the pages under high memory pressure. - */ -static inline bool should_reclaim_stall(unsigned long nr_taken, - unsigned long nr_freed, - int priority, - struct scan_control *sc) -{ - int lumpy_stall_priority; - - /* kswapd should not stall on sync IO */ - if (current_is_kswapd()) - return false; - - /* Only stall on lumpy reclaim */ - if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) - return false; - - /* If we have reclaimed everything on the isolated list, no stall */ - if (nr_freed == nr_taken) - return false; - - /* - * For high-order allocations, there are two stall thresholds. - * High-cost allocations stall immediately where as lower - * order allocations such as stacks require the scanning - * priority to be much higher before stalling. - */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - lumpy_stall_priority = DEF_PRIORITY; - else - lumpy_stall_priority = DEF_PRIORITY / 3; - - return priority <= lumpy_stall_priority; -} - /* * shrink_inactive_list() is a helper for shrink_zone(). It returns the number * of reclaimed pages */ static noinline_for_stack unsigned long -shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority, int file) +shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, + struct scan_control *sc, enum lru_list lru) { LIST_HEAD(page_list); unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_anon; - unsigned long nr_file; unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; - isolate_mode_t isolate_mode = ISOLATE_INACTIVE; - struct zone *zone = mz->zone; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct zone *zone = lruvec_zone(lruvec); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1522,10 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, return SWAP_CLUSTER_MAX; } - set_reclaim_mode(priority, sc, false); - if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) - isolate_mode |= ISOLATE_ACTIVE; - lru_add_drain(); if (!sc->may_unmap) @@ -1535,38 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, - sc, isolate_mode, 0, file); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, + &nr_scanned, sc, isolate_mode, lru); + + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); + if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) - __count_zone_vm_events(PGSCAN_KSWAPD, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else - __count_zone_vm_events(PGSCAN_DIRECT, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } spin_unlock_irq(&zone->lru_lock); if (nr_taken == 0) return 0; - update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); - - nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, + nr_reclaimed = shrink_page_list(&page_list, zone, sc, &nr_dirty, &nr_writeback); - /* Check if we should syncronously wait for writeback */ - if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { - set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, mz, sc, - priority, &nr_dirty, &nr_writeback); - } - spin_lock_irq(&zone->lru_lock); - reclaim_stat->recent_scanned[0] += nr_anon; - reclaim_stat->recent_scanned[1] += nr_file; + reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) { if (current_is_kswapd()) @@ -1577,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, nr_reclaimed); } - putback_inactive_pages(mz, &page_list); + putback_inactive_pages(lruvec, &page_list); - __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1609,14 +1291,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any * isolated page is PageWriteback */ - if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) + if (nr_writeback && nr_writeback >= + (nr_taken >> (DEF_PRIORITY - sc->priority))) wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, - priority, - trace_shrink_flags(file, sc->reclaim_mode)); + sc->priority, + trace_shrink_flags(file)); return nr_reclaimed; } @@ -1638,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * But we had to alter page->flags anyway. */ -static void move_active_pages_to_lru(struct zone *zone, +static void move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *list, struct list_head *pages_to_free, enum lru_list lru) { + struct zone *zone = lruvec_zone(lruvec); unsigned long pgmoved = 0; struct page *page; + int nr_pages; while (!list_empty(list)) { - struct lruvec *lruvec; - page = lru_to_page(list); + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - lruvec = mem_cgroup_lru_add_list(zone, page, lru); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); list_move(&page->lru, &lruvec->lists[lru]); - pgmoved += hpage_nr_pages(page); + pgmoved += nr_pages; if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1677,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone, } static void shrink_active_list(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, + struct lruvec *lruvec, struct scan_control *sc, - int priority, int file) + enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; @@ -1688,15 +1373,14 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_active); LIST_HEAD(l_inactive); struct page *page; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; unsigned long nr_rotated = 0; - isolate_mode_t isolate_mode = ISOLATE_ACTIVE; - struct zone *zone = mz->zone; + isolate_mode_t isolate_mode = 0; + int file = is_file_lru(lru); + struct zone *zone = lruvec_zone(lruvec); lru_add_drain(); - reset_reclaim_mode(sc); - if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; if (!sc->may_writepage) @@ -1704,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, - isolate_mode, 1, file); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, + &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) zone->pages_scanned += nr_scanned; reclaim_stat->recent_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, nr_scanned); - if (file) - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); - else - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1737,7 +1418,8 @@ static void shrink_active_list(unsigned long nr_to_scan, } } - if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { + if (page_referenced(page, 0, sc->target_mem_cgroup, + &vm_flags)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and @@ -1770,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(zone, &l_active, &l_hold, - LRU_ACTIVE + file * LRU_FILE); - move_active_pages_to_lru(zone, &l_inactive, &l_hold, - LRU_BASE + file * LRU_FILE); + move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); + move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1796,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone) /** * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * @sc: scan control of this context + * @lruvec: LRU vector to check * * Returns true if the zone does not have enough inactive anon pages, * meaning some active anon pages need to be deactivated. */ -static int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static int inactive_anon_is_low(struct lruvec *lruvec) { /* * If we don't have swap space, anonymous page deactivation @@ -1811,14 +1490,13 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) if (!total_swap_pages) return 0; - if (!scanning_global_lru(mz)) - return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, - mz->zone); + if (!mem_cgroup_disabled()) + return mem_cgroup_inactive_anon_is_low(lruvec); - return inactive_anon_is_low_global(mz->zone); + return inactive_anon_is_low_global(lruvec_zone(lruvec)); } #else -static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static inline int inactive_anon_is_low(struct lruvec *lruvec) { return 0; } @@ -1836,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone) /** * inactive_file_is_low - check if file pages need to be deactivated - * @mz: memory cgroup and zone to check + * @lruvec: LRU vector to check * * When the system is doing streaming IO, memory pressure here * ensures that active file pages get deactivated, until more @@ -1848,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone) * This uses a different ratio than the anonymous pages, because * the page cache uses a use-once replacement algorithm. */ -static int inactive_file_is_low(struct mem_cgroup_zone *mz) +static int inactive_file_is_low(struct lruvec *lruvec) { - if (!scanning_global_lru(mz)) - return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, - mz->zone); + if (!mem_cgroup_disabled()) + return mem_cgroup_inactive_file_is_low(lruvec); - return inactive_file_is_low_global(mz->zone); + return inactive_file_is_low_global(lruvec_zone(lruvec)); } -static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) +static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) { - if (file) - return inactive_file_is_low(mz); + if (is_file_lru(lru)) + return inactive_file_is_low(lruvec); else - return inactive_anon_is_low(mz); + return inactive_anon_is_low(lruvec); } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority) + struct lruvec *lruvec, struct scan_control *sc) { - int file = is_file_lru(lru); - if (is_active_lru(lru)) { - if (inactive_list_is_low(mz, file)) - shrink_active_list(nr_to_scan, mz, sc, priority, file); + if (inactive_list_is_low(lruvec, lru)) + shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } - return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); + return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); } -static int vmscan_swappiness(struct mem_cgroup_zone *mz, - struct scan_control *sc) +static int vmscan_swappiness(struct scan_control *sc) { if (global_reclaim(sc)) return vm_swappiness; - return mem_cgroup_swappiness(mz->mem_cgroup); + return mem_cgroup_swappiness(sc->target_mem_cgroup); } /* @@ -1896,17 +1569,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz, * * nr[0] = anon pages to scan; nr[1] = file pages to scan */ -static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, - unsigned long *nr, int priority) +static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, + unsigned long *nr) { unsigned long anon, file, free; unsigned long anon_prio, file_prio; unsigned long ap, fp; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2], denominator; enum lru_list lru; int noswap = 0; bool force_scan = false; + struct zone *zone = lruvec_zone(lruvec); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -1918,7 +1592,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * latencies, so it's better to scan a minimum amount there as * well. */ - if (current_is_kswapd() && mz->zone->all_unreclaimable) + if (current_is_kswapd() && zone->all_unreclaimable) force_scan = true; if (!global_reclaim(sc)) force_scan = true; @@ -1932,16 +1606,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, goto out; } - anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + + get_lru_size(lruvec, LRU_INACTIVE_ANON); + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + + get_lru_size(lruvec, LRU_INACTIVE_FILE); if (global_reclaim(sc)) { - free = zone_page_state(mz->zone, NR_FREE_PAGES); + free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, force-scan anon pages. */ - if (unlikely(file + free <= high_wmark_pages(mz->zone))) { + if (unlikely(file + free <= high_wmark_pages(zone))) { fraction[0] = 1; fraction[1] = 0; denominator = 1; @@ -1953,8 +1627,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * With swappiness at 100, anonymous and file have the same priority. * This scanning priority is essentially the inverse of IO cost. */ - anon_prio = vmscan_swappiness(mz, sc); - file_prio = 200 - vmscan_swappiness(mz, sc); + anon_prio = vmscan_swappiness(sc); + file_prio = 200 - anon_prio; /* * OK, so we have swap space and a fair amount of page cache @@ -1967,7 +1641,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * * anon in [0], file in [1] */ - spin_lock_irq(&mz->zone->lru_lock); + spin_lock_irq(&zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; reclaim_stat->recent_rotated[0] /= 2; @@ -1983,12 +1657,12 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * proportional to the fraction of recently scanned pages on * each list that were recently referenced and in active use. */ - ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); + ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1); ap /= reclaim_stat->recent_rotated[0] + 1; - fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); + fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; - spin_unlock_irq(&mz->zone->lru_lock); + spin_unlock_irq(&zone->lru_lock); fraction[0] = ap; fraction[1] = fp; @@ -1998,9 +1672,9 @@ out: int file = is_file_lru(lru); unsigned long scan; - scan = zone_nr_lru_pages(mz, lru); - if (priority || noswap) { - scan >>= priority; + scan = get_lru_size(lruvec, lru); + if (sc->priority || noswap || !vmscan_swappiness(sc)) { + scan >>= sc->priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; scan = div64_u64(scan * fraction[file], denominator); @@ -2009,14 +1683,25 @@ out: } } +/* Use reclaim/compaction for costly allocs or under memory pressure */ +static bool in_reclaim_compaction(struct scan_control *sc) +{ + if (COMPACTION_BUILD && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + sc->priority < DEF_PRIORITY - 2)) + return true; + + return false; +} + /* - * Reclaim/compaction depends on a number of pages being freed. To avoid - * disruption to the system, a small number of order-0 pages continue to be - * rotated and reclaimed in the normal fashion. However, by the time we get - * back to the allocator and call try_to_compact_zone(), we ensure that - * there are enough free pages for it to be likely successful + * Reclaim/compaction is used for high-order allocation requests. It reclaims + * order-0 pages before compacting the zone. should_continue_reclaim() returns + * true if more pages should be reclaimed such that when the page allocator + * calls try_to_compact_zone() that it will have enough free pages to succeed. + * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, +static inline bool should_continue_reclaim(struct lruvec *lruvec, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) @@ -2025,7 +1710,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, unsigned long inactive_lru_pages; /* If not in reclaim/compaction mode, stop */ - if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) + if (!in_reclaim_compaction(sc)) return false; /* Consider stopping depending on scan and reclaim activity */ @@ -2056,15 +1741,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); + inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(mz->zone, sc->order)) { + switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { case COMPACT_PARTIAL: case COMPACT_CONTINUE: return false; @@ -2076,8 +1761,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, - struct scan_control *sc) +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -2089,7 +1773,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; - get_scan_count(mz, sc, nr, priority); + get_scan_count(lruvec, sc, nr); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -2101,7 +1785,7 @@ restart: nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - mz, sc, priority); + lruvec, sc); } } /* @@ -2112,7 +1796,8 @@ restart: * with multiple processes reclaiming pages, the total * freeing target can get unreasonably large. */ - if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) + if (nr_reclaimed >= nr_to_reclaim && + sc->priority < DEF_PRIORITY) break; } blk_finish_plug(&plug); @@ -2122,35 +1807,33 @@ restart: * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_anon_is_low(mz)) - shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ - if (should_continue_reclaim(mz, nr_reclaimed, - sc->nr_scanned - nr_scanned, sc)) + if (should_continue_reclaim(lruvec, nr_reclaimed, + sc->nr_scanned - nr_scanned, sc)) goto restart; throttle_vm_writeout(sc->gfp_mask); } -static void shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { .zone = zone, - .priority = priority, + .priority = sc->priority, }; struct mem_cgroup *memcg; memcg = mem_cgroup_iter(root, NULL, &reclaim); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + + shrink_lruvec(lruvec, sc); - shrink_mem_cgroup_zone(priority, &mz, sc); /* * Limit reclaim has historically picked one memcg and * scanned it with decreasing priority levels until @@ -2226,8 +1909,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) * the caller that it should consider retrying the allocation instead of * further reclaim. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, - struct scan_control *sc) +static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; @@ -2254,7 +1936,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, if (global_reclaim(sc)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc->priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ if (COMPACTION_BUILD) { /* @@ -2286,7 +1969,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, /* need some check for avoid more shrink_zone() */ } - shrink_zone(priority, zone, sc); + shrink_zone(zone, sc); } return aborted_reclaim; @@ -2337,7 +2020,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc, struct shrink_control *shrink) { - int priority; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -2350,11 +2032,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (global_reclaim(sc)) count_vm_event(ALLOCSTALL); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { sc->nr_scanned = 0; - if (!priority) - disable_swap_token(sc->target_mem_cgroup); - aborted_reclaim = shrink_zones(priority, zonelist, sc); + aborted_reclaim = shrink_zones(zonelist, sc); /* * Don't shrink slabs when reclaiming memory from @@ -2396,7 +2076,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (!sc->hibernation_mode && sc->nr_scanned && - priority < DEF_PRIORITY - 2) { + sc->priority < DEF_PRIORITY - 2) { struct zone *preferred_zone; first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), @@ -2404,7 +2084,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, &preferred_zone); wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); } - } + } while (--sc->priority >= 0); out: delayacct_freepages_end(); @@ -2442,6 +2122,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_unmap = 1, .may_swap = 1, .order = order, + .priority = DEF_PRIORITY, .target_mem_cgroup = NULL, .nodemask = nodemask, }; @@ -2474,17 +2155,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !noswap, .order = 0, + .priority = 0, .target_mem_cgroup = memcg, }; - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); - trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, + trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, sc.may_writepage, sc.gfp_mask); @@ -2495,7 +2174,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_mem_cgroup_zone(0, &mz, &sc); + shrink_lruvec(lruvec, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2516,6 +2195,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_swap = !noswap, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, + .priority = DEF_PRIORITY, .target_mem_cgroup = memcg, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | @@ -2546,8 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, } #endif -static void age_active_anon(struct zone *zone, struct scan_control *sc, - int priority) +static void age_active_anon(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -2556,14 +2235,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); - if (inactive_anon_is_low(&mz)) - shrink_active_list(SWAP_CLUSTER_MAX, &mz, - sc, priority, 0); + if (inactive_anon_is_low(lruvec)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); @@ -2672,7 +2348,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int all_zones_ok; unsigned long balanced; - int priority; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long total_scanned; @@ -2696,18 +2371,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, }; loop_again: total_scanned = 0; + sc.priority = DEF_PRIORITY; sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; - /* The swap token gets in the way of swapout... */ - if (!priority) - disable_swap_token(NULL); - all_zones_ok = 1; balanced = 0; @@ -2721,14 +2393,15 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - age_active_anon(zone, &sc, priority); + age_active_anon(zone, &sc); /* * If the number of buffer_heads in the machine @@ -2776,7 +2449,8 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; sc.nr_scanned = 0; @@ -2820,7 +2494,7 @@ loop_again: !zone_watermark_ok_safe(zone, testorder, high_wmark_pages(zone) + balance_gap, end_zone, 0)) { - shrink_zone(priority, zone, &sc); + shrink_zone(zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); @@ -2877,7 +2551,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && (priority < DEF_PRIORITY - 2)) { + if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else @@ -2892,7 +2566,7 @@ loop_again: */ if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) break; - } + } while (--sc.priority >= 0); out: /* @@ -2942,7 +2616,8 @@ out: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* Would compaction fail due to lack of free memory? */ @@ -3209,6 +2884,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .nr_to_reclaim = nr_to_reclaim, .hibernation_mode = 1, .order = 0, + .priority = DEF_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3386,7 +3062,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) const unsigned long nr_pages = 1 << order; struct task_struct *p = current; struct reclaim_state reclaim_state; - int priority; struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), @@ -3395,6 +3070,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .order = order, + .priority = ZONE_RECLAIM_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3417,11 +3093,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * Free memory by calling shrink zone with increasing * priorities until we have enough memory freed. */ - priority = ZONE_RECLAIM_PRIORITY; do { - shrink_zone(priority, zone, &sc); - priority--; - } while (priority >= 0 && sc.nr_reclaimed < nr_pages); + shrink_zone(zone, &sc); + } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); @@ -3536,7 +3210,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) if (mapping_unevictable(page_mapping(page))) return 0; - if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) + if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page))) return 0; return 1; @@ -3572,6 +3246,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + lruvec = mem_cgroup_page_lruvec(page, zone); if (!PageLRU(page) || !PageUnevictable(page)) continue; @@ -3581,11 +3256,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) VM_BUG_ON(PageActive(page)); ClearPageUnevictable(page); - __dec_zone_state(zone, NR_UNEVICTABLE); - lruvec = mem_cgroup_lru_move_lists(zone, page, - LRU_UNEVICTABLE, lru); - list_move(&page->lru, &lruvec->lists[lru]); - __inc_zone_state(zone, NR_INACTIVE_ANON + lru); + del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); + add_page_to_lru_list(page, lruvec, lru); pgrescued++; } } |