diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 12:32:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 12:32:41 -0700 |
commit | 98931dd95fd489fcbfa97da563505a6f071d7c77 (patch) | |
tree | 44683fc4a92efa614acdca2742a7ff19d26da1e3 /mm/migrate.c | |
parent | df202b452fe6c6d6f1351bad485e2367ef1e644e (diff) | |
parent | f403f22f8ccb12860b2b62fec3173c6ccd45938b (diff) | |
download | linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.gz linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.bz2 linux-98931dd95fd489fcbfa97da563505a6f071d7c77.zip |
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
"Almost all of MM here. A few things are still getting finished off,
reviewed, etc.
- Yang Shi has improved the behaviour of khugepaged collapsing of
readonly file-backed transparent hugepages.
- Johannes Weiner has arranged for zswap memory use to be tracked and
managed on a per-cgroup basis.
- Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for
runtime enablement of the recent huge page vmemmap optimization
feature.
- Baolin Wang contributes a series to fix some issues around hugetlb
pagetable invalidation.
- Zhenwei Pi has fixed some interactions between hwpoisoned pages and
virtualization.
- Tong Tiangen has enabled the use of the presently x86-only
page_table_check debugging feature on arm64 and riscv.
- David Vernet has done some fixup work on the memcg selftests.
- Peter Xu has taught userfaultfd to handle write protection faults
against shmem- and hugetlbfs-backed files.
- More DAMON development from SeongJae Park - adding online tuning of
the feature and support for monitoring of fixed virtual address
ranges. Also easier discovery of which monitoring operations are
available.
- Nadav Amit has done some optimization of TLB flushing during
mprotect().
- Neil Brown continues to labor away at improving our swap-over-NFS
support.
- David Hildenbrand has some fixes to anon page COWing versus
get_user_pages().
- Peng Liu fixed some errors in the core hugetlb code.
- Joao Martins has reduced the amount of memory consumed by
device-dax's compound devmaps.
- Some cleanups of the arch-specific pagemap code from Anshuman
Khandual.
- Muchun Song has found and fixed some errors in the TLB flushing of
transparent hugepages.
- Roman Gushchin has done more work on the memcg selftests.
... and, of course, many smaller fixes and cleanups. Notably, the
customary million cleanup serieses from Miaohe Lin"
* tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits)
mm: kfence: use PAGE_ALIGNED helper
selftests: vm: add the "settings" file with timeout variable
selftests: vm: add "test_hmm.sh" to TEST_FILES
selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests
selftests: vm: add migration to the .gitignore
selftests/vm/pkeys: fix typo in comment
ksm: fix typo in comment
selftests: vm: add process_mrelease tests
Revert "mm/vmscan: never demote for memcg reclaim"
mm/kfence: print disabling or re-enabling message
include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace"
include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion"
mm: fix a potential infinite loop in start_isolate_page_range()
MAINTAINERS: add Muchun as co-maintainer for HugeTLB
zram: fix Kconfig dependency warning
mm/shmem: fix shmem folio swapoff hang
cgroup: fix an error handling path in alloc_pagecache_max_30M()
mm: damon: use HPAGE_PMD_SIZE
tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
nodemask.h: fix compilation error with GCC12
...
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 194 |
1 files changed, 97 insertions, 97 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 21d82636c291..e51588e95f57 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -177,6 +177,7 @@ static bool remove_migration_pte(struct folio *folio, DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION); while (page_vma_mapped_walk(&pvmw)) { + rmap_t rmap_flags = RMAP_NONE; pte_t pte; swp_entry_t entry; struct page *new; @@ -211,6 +212,9 @@ static bool remove_migration_pte(struct folio *folio, else if (pte_swp_uffd_wp(*pvmw.pte)) pte = pte_mkuffd_wp(pte); + if (folio_test_anon(folio) && !is_readable_migration_entry(entry)) + rmap_flags |= RMAP_EXCLUSIVE; + if (unlikely(is_device_private_page(new))) { if (pte_write(pte)) entry = make_writable_device_private_entry( @@ -232,15 +236,17 @@ static bool remove_migration_pte(struct folio *folio, pte = pte_mkhuge(pte); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (folio_test_anon(folio)) - hugepage_add_anon_rmap(new, vma, pvmw.address); + hugepage_add_anon_rmap(new, vma, pvmw.address, + rmap_flags); else - page_dup_rmap(new, true); + page_dup_file_rmap(new, true); set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } else #endif { if (folio_test_anon(folio)) - page_add_anon_rmap(new, vma, pvmw.address, false); + page_add_anon_rmap(new, vma, pvmw.address, + rmap_flags); else page_add_file_rmap(new, vma, false); set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); @@ -471,11 +477,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, xas_lock_irq(&xas); expected_count = 2 + page_has_private(page); - if (page_count(page) != expected_count || xas_load(&xas) != page) { - xas_unlock_irq(&xas); - return -EAGAIN; - } - if (!page_ref_freeze(page, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; @@ -517,6 +518,12 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio) folio_set_workingset(newfolio); if (folio_test_checked(folio)) folio_set_checked(newfolio); + /* + * PG_anon_exclusive (-> PG_mappedtodisk) is always migrated via + * migration entries. We can still have PG_anon_exclusive set on an + * effectively unmapped and unreferenced first sub-pages of an + * anonymous THP: we can simply copy it here via PG_mappedtodisk. + */ if (folio_test_mappedtodisk(folio)) folio_set_mappedtodisk(newfolio); @@ -836,21 +843,21 @@ static int fallback_migrate_page(struct address_space *mapping, * < 0 - error code * MIGRATEPAGE_SUCCESS - success */ -static int move_to_new_page(struct page *newpage, struct page *page, +static int move_to_new_folio(struct folio *dst, struct folio *src, enum migrate_mode mode) { struct address_space *mapping; int rc = -EAGAIN; - bool is_lru = !__PageMovable(page); + bool is_lru = !__PageMovable(&src->page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(src), src); + VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst); - mapping = page_mapping(page); + mapping = folio_mapping(src); if (likely(is_lru)) { if (!mapping) - rc = migrate_page(mapping, newpage, page, mode); + rc = migrate_page(mapping, &dst->page, &src->page, mode); else if (mapping->a_ops->migratepage) /* * Most pages have a mapping and most filesystems @@ -859,54 +866,54 @@ static int move_to_new_page(struct page *newpage, struct page *page, * migratepage callback. This is the most common path * for page migration. */ - rc = mapping->a_ops->migratepage(mapping, newpage, - page, mode); + rc = mapping->a_ops->migratepage(mapping, &dst->page, + &src->page, mode); else - rc = fallback_migrate_page(mapping, newpage, - page, mode); + rc = fallback_migrate_page(mapping, &dst->page, + &src->page, mode); } else { /* * In case of non-lru page, it could be released after * isolation step. In that case, we shouldn't try migration. */ - VM_BUG_ON_PAGE(!PageIsolated(page), page); - if (!PageMovable(page)) { + VM_BUG_ON_FOLIO(!folio_test_isolated(src), src); + if (!folio_test_movable(src)) { rc = MIGRATEPAGE_SUCCESS; - ClearPageIsolated(page); + folio_clear_isolated(src); goto out; } - rc = mapping->a_ops->migratepage(mapping, newpage, - page, mode); + rc = mapping->a_ops->migratepage(mapping, &dst->page, + &src->page, mode); WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && - !PageIsolated(page)); + !folio_test_isolated(src)); } /* - * When successful, old pagecache page->mapping must be cleared before - * page is freed; but stats require that PageAnon be left as PageAnon. + * When successful, old pagecache src->mapping must be cleared before + * src is freed; but stats require that PageAnon be left as PageAnon. */ if (rc == MIGRATEPAGE_SUCCESS) { - if (__PageMovable(page)) { - VM_BUG_ON_PAGE(!PageIsolated(page), page); + if (__PageMovable(&src->page)) { + VM_BUG_ON_FOLIO(!folio_test_isolated(src), src); /* * We clear PG_movable under page_lock so any compactor * cannot try to migrate this page. */ - ClearPageIsolated(page); + folio_clear_isolated(src); } /* - * Anonymous and movable page->mapping will be cleared by + * Anonymous and movable src->mapping will be cleared by * free_pages_prepare so don't reset it here for keeping * the type to work PageAnon, for example. */ - if (!PageMappingFlags(page)) - page->mapping = NULL; + if (!folio_mapping_flags(src)) + src->mapping = NULL; - if (likely(!is_zone_device_page(newpage))) - flush_dcache_folio(page_folio(newpage)); + if (likely(!folio_is_zone_device(dst))) + flush_dcache_folio(dst); } out: return rc; @@ -994,7 +1001,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, goto out_unlock; if (unlikely(!is_lru)) { - rc = move_to_new_page(newpage, page, mode); + rc = move_to_new_folio(dst, folio, mode); goto out_unlock_both; } @@ -1025,7 +1032,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, } if (!page_mapped(page)) - rc = move_to_new_page(newpage, page, mode); + rc = move_to_new_folio(dst, folio, mode); /* * When successful, push newpage to LRU immediately: so that if it @@ -1230,7 +1237,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, goto put_anon; if (page_mapped(hpage)) { - bool mapping_locked = false; enum ttu_flags ttu = 0; if (!PageAnon(hpage)) { @@ -1244,19 +1250,18 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (unlikely(!mapping)) goto unlock_put_anon; - mapping_locked = true; - ttu |= TTU_RMAP_LOCKED; + ttu = TTU_RMAP_LOCKED; } try_to_migrate(src, ttu); page_was_mapped = 1; - if (mapping_locked) + if (ttu & TTU_RMAP_LOCKED) i_mmap_unlock_write(mapping); } if (!page_mapped(hpage)) - rc = move_to_new_page(new_hpage, hpage, mode); + rc = move_to_new_folio(dst, src, mode); if (page_was_mapped) remove_migration_ptes(src, @@ -1412,14 +1417,11 @@ retry: nr_thp_split++; goto retry; } - - nr_failed_pages += nr_subpages; - break; - } - /* Hugetlb migration is unsupported */ - if (!no_subpage_counting) + } else if (!no_subpage_counting) { nr_failed++; + } + nr_failed_pages += nr_subpages; break; case -ENOMEM: @@ -1434,28 +1436,30 @@ retry: nr_thp_split++; goto retry; } - - nr_failed_pages += nr_subpages; - goto out; + } else if (!no_subpage_counting) { + nr_failed++; } - if (!no_subpage_counting) - nr_failed++; nr_failed_pages += nr_subpages; + /* + * There might be some subpages of fail-to-migrate THPs + * left in thp_split_pages list. Move them back to migration + * list so that they could be put back to the right list by + * the caller otherwise the page refcnt will be leaked. + */ + list_splice_init(&thp_split_pages, from); + nr_thp_failed += thp_retry; goto out; case -EAGAIN: - if (is_thp) { + if (is_thp) thp_retry++; - break; - } - retry++; + else + retry++; break; case MIGRATEPAGE_SUCCESS: nr_succeeded += nr_subpages; - if (is_thp) { + if (is_thp) nr_thp_succeeded++; - break; - } break; default: /* @@ -1464,14 +1468,11 @@ retry: * removed from migration page list and not * retried in the next outer loop. */ - if (is_thp) { + if (is_thp) nr_thp_failed++; - nr_failed_pages += nr_subpages; - break; - } - - if (!no_subpage_counting) + else if (!no_subpage_counting) nr_failed++; + nr_failed_pages += nr_subpages; break; } @@ -1606,8 +1607,8 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, mmap_read_lock(mm); err = -EFAULT; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start || !vma_migratable(vma)) + vma = vma_lookup(mm, addr); + if (!vma || !vma_migratable(vma)) goto out; /* FOLL_DUMP to ignore special (like zero) pages */ @@ -1802,13 +1803,18 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, goto set_status; /* FOLL_DUMP to ignore special (like zero) pages */ - page = follow_page(vma, addr, FOLL_DUMP); + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); err = PTR_ERR(page); if (IS_ERR(page)) goto set_status; - err = page ? page_to_nid(page) : -ENOENT; + if (page) { + err = page_to_nid(page); + put_page(page); + } else { + err = -ENOENT; + } set_status: *status = err; @@ -1844,16 +1850,12 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, const void __user * __user *pages, int __user *status) { -#define DO_PAGES_STAT_CHUNK_NR 16 +#define DO_PAGES_STAT_CHUNK_NR 16UL const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; while (nr_pages) { - unsigned long chunk_nr; - - chunk_nr = nr_pages; - if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) - chunk_nr = DO_PAGES_STAT_CHUNK_NR; + unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR); if (in_compat_syscall()) { if (get_compat_pages_array(chunk_pages, pages, @@ -1969,7 +1971,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, #ifdef CONFIG_NUMA_BALANCING /* * Returns true if this is a safe migration target node for misplaced NUMA - * pages. Currently it only checks the watermarks which crude + * pages. Currently it only checks the watermarks which is crude. */ static bool migrate_balanced_pgdat(struct pglist_data *pgdat, unsigned long nr_migrate_pages) @@ -1979,7 +1981,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat, for (z = pgdat->nr_zones - 1; z >= 0; z--) { struct zone *zone = pgdat->node_zones + z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; /* Avoid waking kswapd by allocating pages_to_migrate pages. */ @@ -2015,7 +2017,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page, static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { - int page_lru; int nr_pages = thp_nr_pages(page); int order = compound_order(page); @@ -2032,7 +2033,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)) return 0; for (z = pgdat->nr_zones - 1; z >= 0; z--) { - if (populated_zone(pgdat->node_zones + z)) + if (managed_zone(pgdat->node_zones + z)) break; } wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE); @@ -2042,8 +2043,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) if (isolate_lru_page(page)) return 0; - page_lru = page_is_file_lru(page); - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru, + mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page), nr_pages); /* @@ -2116,7 +2116,6 @@ out: return 0; } #endif /* CONFIG_NUMA_BALANCING */ -#endif /* CONFIG_NUMA */ /* * node_demotion[] example: @@ -2250,7 +2249,6 @@ out: return target; } -#if defined(CONFIG_HOTPLUG_CPU) /* Disable reclaim-based migration. */ static void __disable_all_migrate_targets(void) { @@ -2353,8 +2351,8 @@ out_clear: */ static void __set_migration_target_nodes(void) { - nodemask_t next_pass = NODE_MASK_NONE; - nodemask_t this_pass = NODE_MASK_NONE; + nodemask_t next_pass; + nodemask_t this_pass; nodemask_t used_targets = NODE_MASK_NONE; int node, best_distance; @@ -2443,6 +2441,7 @@ void set_migration_target_nodes(void) * __set_migration_target_nodes() can be used as opposed to * set_migration_target_nodes(). */ +#ifdef CONFIG_MEMORY_HOTPLUG static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, unsigned long action, void *_arg) { @@ -2488,15 +2487,17 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } +#endif void __init migrate_on_reclaim_init(void) { - node_demotion = kmalloc_array(nr_node_ids, - sizeof(struct demotion_nodes), - GFP_KERNEL); + node_demotion = kcalloc(nr_node_ids, + sizeof(struct demotion_nodes), + GFP_KERNEL); WARN_ON(!node_demotion); - +#ifdef CONFIG_MEMORY_HOTPLUG hotplug_memory_notifier(migrate_on_reclaim_callback, 100); +#endif /* * At this point, all numa nodes with memory/CPus have their state * properly set, so we can build the demotion order now. @@ -2507,7 +2508,6 @@ void __init migrate_on_reclaim_init(void) set_migration_target_nodes(); cpus_read_unlock(); } -#endif /* CONFIG_HOTPLUG_CPU */ bool numa_demotion_enabled = false; @@ -2523,12 +2523,11 @@ static ssize_t numa_demotion_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) - numa_demotion_enabled = true; - else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) - numa_demotion_enabled = false; - else - return -EINVAL; + ssize_t ret; + + ret = kstrtobool(buf, &numa_demotion_enabled); + if (ret) + return ret; return count; } @@ -2568,4 +2567,5 @@ delete_obj: return err; } subsys_initcall(numa_init_sysfs); -#endif +#endif /* CONFIG_SYSFS */ +#endif /* CONFIG_NUMA */ |