summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c4
-rw-r--r--mm/hugetlb_vmemmap.c6
-rw-r--r--mm/internal.h1
-rw-r--r--mm/kasan/kasan_test_c.c20
-rw-r--r--mm/memcontrol-v1.c2
-rw-r--r--mm/memory.c4
-rw-r--r--mm/mm_init.c1
-rw-r--r--mm/page_alloc.c40
-rw-r--r--mm/vmscan.c29
9 files changed, 82 insertions, 25 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 92351e2fa876..84461d384ae2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2207,8 +2207,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
} while (start != end);
mmap_read_unlock(mm);
- if (size > (unsigned long)uaddr - start)
- return size - ((unsigned long)uaddr - start);
+ if (size > start - (unsigned long)uaddr)
+ return size - (start - (unsigned long)uaddr);
return 0;
}
EXPORT_SYMBOL(fault_in_safe_writeable);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 9a99dfa3c495..27245e86df25 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -238,11 +238,11 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
* struct page, the special metadata (e.g. page->flags or page->mapping)
* cannot copy to the tail struct page structs. The invalid value will be
* checked in the free_tail_page_prepare(). In order to avoid the message
- * of "corrupted mapping in tail page". We need to reset at least 3 (one
- * head struct page struct and two tail struct page structs) struct page
+ * of "corrupted mapping in tail page". We need to reset at least 4 (one
+ * head struct page struct and three tail struct page structs) struct page
* structs.
*/
-#define NR_RESET_STRUCT_PAGE 3
+#define NR_RESET_STRUCT_PAGE 4
static inline void reset_struct_pages(struct page *start)
{
diff --git a/mm/internal.h b/mm/internal.h
index 50c2f590b2d0..e9695baa5922 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1595,6 +1595,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
#ifdef CONFIG_UNACCEPTED_MEMORY
void accept_page(struct page *page);
+void unaccepted_cleanup_work(struct work_struct *work);
#else /* CONFIG_UNACCEPTED_MEMORY */
static inline void accept_page(struct page *page)
{
diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c
index f24e3bef72a4..5f922dd38ffa 100644
--- a/mm/kasan/kasan_test_c.c
+++ b/mm/kasan/kasan_test_c.c
@@ -1567,6 +1567,7 @@ static void kasan_memcmp(struct kunit *test)
static void kasan_strings(struct kunit *test)
{
char *ptr;
+ char *src;
size_t size = 24;
/*
@@ -1578,6 +1579,25 @@ static void kasan_strings(struct kunit *test)
ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ src = kmalloc(KASAN_GRANULE_SIZE, GFP_KERNEL | __GFP_ZERO);
+ strscpy(src, "f0cacc1a0000000", KASAN_GRANULE_SIZE);
+
+ /*
+ * Make sure that strscpy() does not trigger KASAN if it overreads into
+ * poisoned memory.
+ *
+ * The expected size does not include the terminator '\0'
+ * so it is (KASAN_GRANULE_SIZE - 2) ==
+ * KASAN_GRANULE_SIZE - ("initial removed character" + "\0").
+ */
+ KUNIT_EXPECT_EQ(test, KASAN_GRANULE_SIZE - 2,
+ strscpy(ptr, src + 1, KASAN_GRANULE_SIZE));
+
+ /* strscpy should fail if the first byte is unreadable. */
+ KUNIT_EXPECT_KASAN_FAIL(test, strscpy(ptr, src + KASAN_GRANULE_SIZE,
+ KASAN_GRANULE_SIZE));
+
+ kfree(src);
kfree(ptr);
/*
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 8660908850dc..4a9cf27a70af 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -620,7 +620,7 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry)
mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
- swap_cgroup_record(folio, mem_cgroup_id(memcg), entry);
+ swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry);
folio_unqueue_deferred_split(folio);
folio->memcg_data = 0;
diff --git a/mm/memory.c b/mm/memory.c
index 44481fe7c629..ba3ea0a82f7f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3734,8 +3734,6 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio,
return false;
VM_WARN_ON_ONCE(folio_test_ksm(folio));
- VM_WARN_ON_ONCE(folio_mapcount(folio) > folio_nr_pages(folio));
- VM_WARN_ON_ONCE(folio_entire_mapcount(folio));
if (unlikely(folio_test_swapcache(folio))) {
/*
@@ -3760,6 +3758,8 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio,
if (folio_large_mapcount(folio) != folio_ref_count(folio))
goto unlock;
+ VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_nr_pages(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_entire_mapcount(folio), folio);
VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id &&
folio_mm_id(folio, 1) != vma->vm_mm->mm_id);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 84f14fa12d0d..9659689b8ace 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1441,6 +1441,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifdef CONFIG_UNACCEPTED_MEMORY
INIT_LIST_HEAD(&zone->unaccepted_pages);
+ INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
#endif
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1715e34b91af..5669baf2a6fe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3470,18 +3470,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
return false;
}
-bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
- unsigned long mark, int highest_zoneidx)
-{
- long free_pages = zone_page_state(z, NR_FREE_PAGES);
-
- if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
- free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
-
- return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0,
- free_pages);
-}
-
#ifdef CONFIG_NUMA
int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
@@ -7191,6 +7179,11 @@ static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
static bool lazy_accept = true;
+void unaccepted_cleanup_work(struct work_struct *work)
+{
+ static_branch_dec(&zones_with_unaccepted_pages);
+}
+
static int __init accept_memory_parse(char *p)
{
if (!strcmp(p, "lazy")) {
@@ -7229,8 +7222,27 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
- if (last)
- static_branch_dec(&zones_with_unaccepted_pages);
+ if (last) {
+ /*
+ * There are two corner cases:
+ *
+ * - If allocation occurs during the CPU bring up,
+ * static_branch_dec() cannot be used directly as
+ * it causes a deadlock on cpu_hotplug_lock.
+ *
+ * Instead, use schedule_work() to prevent deadlock.
+ *
+ * - If allocation occurs before workqueues are initialized,
+ * static_branch_dec() should be called directly.
+ *
+ * Workqueues are initialized before CPU bring up, so this
+ * will not conflict with the first scenario.
+ */
+ if (system_wq)
+ schedule_work(&zone->unaccepted_cleanup);
+ else
+ unaccepted_cleanup_work(&zone->unaccepted_cleanup);
+ }
}
void accept_page(struct page *page)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b620d74b0f66..3783e45bfc92 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6736,6 +6736,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
* meet watermarks.
*/
for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) {
+ enum zone_stat_item item;
unsigned long free_pages;
if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
@@ -6746,11 +6747,33 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
/*
* In defrag_mode, watermarks must be met in whole
* blocks to avoid polluting allocator fallbacks.
+ *
+ * However, kswapd usually cannot accomplish this on
+ * its own and needs kcompactd support. Once it's
+ * reclaimed a compaction gap, and kswapd_shrink_node
+ * has dropped order, simply ensure there are enough
+ * base pages for compaction, wake kcompactd & sleep.
*/
- if (defrag_mode)
- free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS);
+ if (defrag_mode && order)
+ item = NR_FREE_PAGES_BLOCKS;
else
- free_pages = zone_page_state(zone, NR_FREE_PAGES);
+ item = NR_FREE_PAGES;
+
+ /*
+ * When there is a high number of CPUs in the system,
+ * the cumulative error from the vmstat per-cpu cache
+ * can blur the line between the watermarks. In that
+ * case, be safe and get an accurate snapshot.
+ *
+ * TODO: NR_FREE_PAGES_BLOCKS moves in steps of
+ * pageblock_nr_pages, while the vmstat pcp threshold
+ * is limited to 125. On many configurations that
+ * counter won't actually be per-cpu cached. But keep
+ * things simple for now; revisit when somebody cares.
+ */
+ free_pages = zone_page_state(zone, item);
+ if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark)
+ free_pages = zone_page_state_snapshot(zone, item);
if (__zone_watermark_ok(zone, order, mark, highest_zoneidx,
0, free_pages))