From e755c43eb4a33a29f92bca6df30e5a558845c2f7 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 9 Jan 2024 14:31:19 -0800 Subject: maple_tree: fix comment describing mas_node_count_gfp() The function description comment for mas_node_count_gfp() mistakingly refers to the function as mas_node_count(). Change it to refer to the correct function. Link: https://lkml.kernel.org/r/20240109223119.162357-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Peng Zhang Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- lib/maple_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 6f241bb38799..7b161802860b 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1307,8 +1307,8 @@ static inline void mas_free(struct ma_state *mas, struct maple_enode *used) } /* - * mas_node_count() - Check if enough nodes are allocated and request more if - * there is not enough nodes. + * mas_node_count_gfp() - Check if enough nodes are allocated and request more + * if there is not enough nodes. * @mas: The maple state * @count: The number of nodes needed * @gfp: the gfp flags -- cgit v1.2.3 From 8689d750006bbd811423dd41ed5efcd8a029862c Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 22 Jan 2024 11:20:00 +0100 Subject: maple_tree: avoid duplicate variable init in mast_spanning_rebalance() The local variables r_tmp and l_tmp in mast_spanning_rebalance() are already initialized at its declaration; there is no need to assign the value again. Remove the duplicate initialization of {r,l}_tmp. No functional change. Due to common compiler optimizations, also no change to object code. This issue was identified with clang-analyzer's dead stores analysis. Link: https://lkml.kernel.org/r/20240122102000.29558-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 7b161802860b..82fb5195c235 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -2271,8 +2271,6 @@ bool mast_spanning_rebalance(struct maple_subtree_state *mast) struct ma_state l_tmp = *mast->orig_l; unsigned char depth = 0; - r_tmp = *mast->orig_r; - l_tmp = *mast->orig_l; do { mas_ascend(mast->orig_r); mas_ascend(mast->orig_l); -- cgit v1.2.3 From a60cc288a1a2604bd86d3df129f269887018c3cb Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 31 Jan 2024 14:51:24 -0800 Subject: test_xarray: add tests for advanced multi-index use Patch series "test_xarray: advanced API multi-index tests", v2. This is a respin of the test_xarray multi-index tests [0] which use and demonstrate the advanced API which is used by the page cache. This should let folks more easily follow how we use multi-index to support for example a min order later in the page cache. It also lets us grow the selftests to mimic more of what we do in the page cache. This patch (of 2): The multi index selftests are great but they don't replicate how we deal with the page cache exactly, which makes it a bit hard to follow as the page cache uses the advanced API. Add tests which use the advanced API, mimicking what we do in the page cache, while at it, extend the example to do what is needed for min order support. [mcgrof@kernel.org: fix soft lockup for advanced-api tests] Link: https://lkml.kernel.org/r/20240216194329.840555-1-mcgrof@kernel.org [akpm@linux-foundation.org: s/i/loops/, make non-static] [akpm@linux-foundation.org: restore static storage for loop counter] Link: https://lkml.kernel.org/r/20240131225125.1370598-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20240131225125.1370598-2-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Tested-by: Daniel Gomez Cc: Darrick J. Wong Cc: Dave Chinner Cc: Hannes Reinecke Cc: Matthew Wilcox Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- lib/test_xarray.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index e77d4856442c..1050e9113d2a 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -674,6 +674,181 @@ static noinline void check_multi_store(struct xarray *xa) #endif } +#ifdef CONFIG_XARRAY_MULTI +/* mimics page cache __filemap_add_folio() */ +static noinline void check_xa_multi_store_adv_add(struct xarray *xa, + unsigned long index, + unsigned int order, + void *p) +{ + XA_STATE(xas, xa, index); + unsigned int nrpages = 1UL << order; + + /* users are responsible for index alignemnt to the order when adding */ + XA_BUG_ON(xa, index & (nrpages - 1)); + + xas_set_order(&xas, index, order); + + do { + xas_lock_irq(&xas); + + xas_store(&xas, p); + XA_BUG_ON(xa, xas_error(&xas)); + XA_BUG_ON(xa, xa_load(xa, index) != p); + + xas_unlock_irq(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + XA_BUG_ON(xa, xas_error(&xas)); +} + +/* mimics page_cache_delete() */ +static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa, + unsigned long index, + unsigned int order) +{ + XA_STATE(xas, xa, index); + + xas_set_order(&xas, index, order); + xas_store(&xas, NULL); + xas_init_marks(&xas); +} + +static noinline void check_xa_multi_store_adv_delete(struct xarray *xa, + unsigned long index, + unsigned int order) +{ + xa_lock_irq(xa); + check_xa_multi_store_adv_del_entry(xa, index, order); + xa_unlock_irq(xa); +} + +/* mimics page cache filemap_get_entry() */ +static noinline void *test_get_entry(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + void *p; + static unsigned int loops = 0; + + rcu_read_lock(); +repeat: + xas_reset(&xas); + p = xas_load(&xas); + if (xas_retry(&xas, p)) + goto repeat; + rcu_read_unlock(); + + /* + * This is not part of the page cache, this selftest is pretty + * aggressive and does not want to trust the xarray API but rather + * test it, and for order 20 (4 GiB block size) we can loop over + * over a million entries which can cause a soft lockup. Page cache + * APIs won't be stupid, proper page cache APIs loop over the proper + * order so when using a larger order we skip shared entries. + */ + if (++loops % XA_CHECK_SCHED == 0) + schedule(); + + return p; +} + +static unsigned long some_val = 0xdeadbeef; +static unsigned long some_val_2 = 0xdeaddead; + +/* mimics the page cache usage */ +static noinline void check_xa_multi_store_adv(struct xarray *xa, + unsigned long pos, + unsigned int order) +{ + unsigned int nrpages = 1UL << order; + unsigned long index, base, next_index, next_next_index; + unsigned int i; + + index = pos >> PAGE_SHIFT; + base = round_down(index, nrpages); + next_index = round_down(base + nrpages, nrpages); + next_next_index = round_down(next_index + nrpages, nrpages); + + check_xa_multi_store_adv_add(xa, base, order, &some_val); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val); + + XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL); + + /* Use order 0 for the next item */ + check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2); + XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2); + + /* Remove the next item */ + check_xa_multi_store_adv_delete(xa, next_index, 0); + + /* Now use order for a new pointer */ + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2); + + check_xa_multi_store_adv_delete(xa, next_index, order); + check_xa_multi_store_adv_delete(xa, base, order); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* starting fresh again */ + + /* let's test some holes now */ + + /* hole at base and next_next */ + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL); + + check_xa_multi_store_adv_delete(xa, next_index, order); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* hole at base and next */ + + check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != NULL); + + for (i = 0; i < nrpages; i++) + XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2); + + check_xa_multi_store_adv_delete(xa, next_next_index, order); + XA_BUG_ON(xa, !xa_empty(xa)); +} +#endif + +static noinline void check_multi_store_advanced(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned long end = ULONG_MAX/2; + unsigned long pos, i; + + /* + * About 117 million tests below. + */ + for (pos = 7; pos < end; pos = (pos * pos) + 564) { + for (i = 0; i < max_order; i++) { + check_xa_multi_store_adv(xa, pos, i); + check_xa_multi_store_adv(xa, pos + 157, i); + } + } +#endif +} + static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; @@ -1804,6 +1979,7 @@ static int xarray_checks(void) check_reserve(&array); check_reserve(&xa0); check_multi_store(&array); + check_multi_store_advanced(&array); check_get_order(&array); check_xa_alloc(); check_find(&array); -- cgit v1.2.3 From e777ae44e33e48ad01bfdc978076b03c1f091b4f Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Wed, 31 Jan 2024 14:51:25 -0800 Subject: XArray: add cmpxchg order test XArray multi-index entries do not keep track of the order stored once the entry is being marked as used with cmpxchg (conditionally replaced with NULL). Add a test to check the order is actually lost. The test also verifies the order and entries for all the tied indexes before and after the NULL replacement with xa_cmpxchg. Add another entry at 1 << order that keeps the node around and the order information for the NULL-entry after xa_cmpxchg. Link: https://lkml.kernel.org/r/20240131225125.1370598-3-mcgrof@kernel.org Signed-off-by: Daniel Gomez Signed-off-by: Luis Chamberlain Cc: Darrick J. Wong Cc: Dave Chinner Cc: Hannes Reinecke Cc: Matthew Wilcox Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- lib/test_xarray.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 1050e9113d2a..ebe2af2e072d 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -423,6 +423,59 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_cmpxchg_order(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + void *FIVE = xa_mk_value(5); + unsigned int i, order = 3; + + XA_BUG_ON(xa, xa_store_order(xa, 0, order, FIVE, GFP_KERNEL)); + + /* Check entry FIVE has the order saved */ + XA_BUG_ON(xa, xa_get_order(xa, xa_to_value(FIVE)) != order); + + /* Check all the tied indexes have the same entry and order */ + for (i = 0; i < (1 << order); i++) { + XA_BUG_ON(xa, xa_load(xa, i) != FIVE); + XA_BUG_ON(xa, xa_get_order(xa, i) != order); + } + + /* Ensure that nothing is stored at index '1 << order' */ + XA_BUG_ON(xa, xa_load(xa, 1 << order) != NULL); + + /* + * Additionally, keep the node information and the order at + * '1 << order' + */ + XA_BUG_ON(xa, xa_store_order(xa, 1 << order, order, FIVE, GFP_KERNEL)); + for (i = (1 << order); i < (1 << order) + (1 << order) - 1; i++) { + XA_BUG_ON(xa, xa_load(xa, i) != FIVE); + XA_BUG_ON(xa, xa_get_order(xa, i) != order); + } + + /* Conditionally replace FIVE entry at index '0' with NULL */ + XA_BUG_ON(xa, xa_cmpxchg(xa, 0, FIVE, NULL, GFP_KERNEL) != FIVE); + + /* Verify the order is lost at FIVE (and old) entries */ + XA_BUG_ON(xa, xa_get_order(xa, xa_to_value(FIVE)) != 0); + + /* Verify the order and entries are lost in all the tied indexes */ + for (i = 0; i < (1 << order); i++) { + XA_BUG_ON(xa, xa_load(xa, i) != NULL); + XA_BUG_ON(xa, xa_get_order(xa, i) != 0); + } + + /* Verify node and order are kept at '1 << order' */ + for (i = (1 << order); i < (1 << order) + (1 << order) - 1; i++) { + XA_BUG_ON(xa, xa_load(xa, i) != FIVE); + XA_BUG_ON(xa, xa_get_order(xa, i) != order); + } + + xa_store_order(xa, 0, BITS_PER_LONG - 1, NULL, GFP_KERNEL); + XA_BUG_ON(xa, !xa_empty(xa)); +#endif +} + static noinline void check_reserve(struct xarray *xa) { void *entry; @@ -1976,6 +2029,7 @@ static int xarray_checks(void) check_xas_erase(&array); check_insert(&array); check_cmpxchg(&array); + check_cmpxchg_order(&array); check_reserve(&array); check_reserve(&xa0); check_multi_store(&array); -- cgit v1.2.3 From 3ee34eabac2abb6b1b6fcdebffe18870719ad000 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Feb 2024 22:59:01 +0100 Subject: lib/stackdepot: fix first entry having a 0-handle Patch series "page_owner: print stacks and their outstanding allocations", v10. page_owner is a great debug functionality tool that lets us know about all pages that have been allocated/freed and their specific stacktrace. This comes very handy when debugging memory leaks, since with some scripting we can see the outstanding allocations, which might point to a memory leak. In my experience, that is one of the most useful cases, but it can get really tedious to screen through all pages and try to reconstruct the stack <-> allocated/freed relationship, becoming most of the time a daunting and slow process when we have tons of allocation/free operations. This patchset aims to ease that by adding a new functionality into page_owner. This functionality creates a new directory called 'page_owner_stacks' under 'sys/kernel//debug' with a read-only file called 'show_stacks', which prints out all the stacks followed by their outstanding number of allocations (being that the times the stacktrace has allocated but not freed yet). This gives us a clear and a quick overview of stacks <-> allocated/free. We take advantage of the new refcount_f field that stack_record struct gained, and increment/decrement the stack refcount on every __set_page_owner() (alloc operation) and __reset_page_owner (free operation) call. Unfortunately, we cannot use the new stackdepot api STACK_DEPOT_FLAG_GET because it does not fulfill page_owner needs, meaning we would have to special case things, at which point makes more sense for page_owner to do its own {dec,inc}rementing of the stacks. E.g: Using STACK_DEPOT_FLAG_PUT, once the refcount reaches 0, such stack gets evicted, so page_owner would lose information. This patchset also creates a new file called 'set_threshold' within 'page_owner_stacks' directory, and by writing a value to it, the stacks which refcount is below such value will be filtered out. A PoC can be found below: # cat /sys/kernel/debug/page_owner_stacks/show_stacks > page_owner_full_stacks.txt # head -40 page_owner_full_stacks.txt prep_new_page+0xa9/0x120 get_page_from_freelist+0x801/0x2210 __alloc_pages+0x18b/0x350 alloc_pages_mpol+0x91/0x1f0 folio_alloc+0x14/0x50 filemap_alloc_folio+0xb2/0x100 page_cache_ra_unbounded+0x96/0x180 filemap_get_pages+0xfd/0x590 filemap_read+0xcc/0x330 blkdev_read_iter+0xb8/0x150 vfs_read+0x285/0x320 ksys_read+0xa5/0xe0 do_syscall_64+0x80/0x160 entry_SYSCALL_64_after_hwframe+0x6e/0x76 stack_count: 521 prep_new_page+0xa9/0x120 get_page_from_freelist+0x801/0x2210 __alloc_pages+0x18b/0x350 alloc_pages_mpol+0x91/0x1f0 folio_alloc+0x14/0x50 filemap_alloc_folio+0xb2/0x100 __filemap_get_folio+0x14a/0x490 ext4_write_begin+0xbd/0x4b0 [ext4] generic_perform_write+0xc1/0x1e0 ext4_buffered_write_iter+0x68/0xe0 [ext4] ext4_file_write_iter+0x70/0x740 [ext4] vfs_write+0x33d/0x420 ksys_write+0xa5/0xe0 do_syscall_64+0x80/0x160 entry_SYSCALL_64_after_hwframe+0x6e/0x76 stack_count: 4609 ... ... # echo 5000 > /sys/kernel/debug/page_owner_stacks/set_threshold # cat /sys/kernel/debug/page_owner_stacks/show_stacks > page_owner_full_stacks_5000.txt # head -40 page_owner_full_stacks_5000.txt prep_new_page+0xa9/0x120 get_page_from_freelist+0x801/0x2210 __alloc_pages+0x18b/0x350 alloc_pages_mpol+0x91/0x1f0 folio_alloc+0x14/0x50 filemap_alloc_folio+0xb2/0x100 __filemap_get_folio+0x14a/0x490 ext4_write_begin+0xbd/0x4b0 [ext4] generic_perform_write+0xc1/0x1e0 ext4_buffered_write_iter+0x68/0xe0 [ext4] ext4_file_write_iter+0x70/0x740 [ext4] vfs_write+0x33d/0x420 ksys_pwrite64+0x75/0x90 do_syscall_64+0x80/0x160 entry_SYSCALL_64_after_hwframe+0x6e/0x76 stack_count: 6781 prep_new_page+0xa9/0x120 get_page_from_freelist+0x801/0x2210 __alloc_pages+0x18b/0x350 pcpu_populate_chunk+0xec/0x350 pcpu_balance_workfn+0x2d1/0x4a0 process_scheduled_works+0x84/0x380 worker_thread+0x12a/0x2a0 kthread+0xe3/0x110 ret_from_fork+0x30/0x50 ret_from_fork_asm+0x1b/0x30 stack_count: 8641 This patch (of 7): The very first entry of stack_record gets a handle of 0, but this is wrong because stackdepot treats a 0-handle as a non-valid one. E.g: See the check in stack_depot_fetch() Fix this by adding and offset of 1. This bug has been lurking since the very beginning of stackdepot, but no one really cared as it seems. Because of that I am not adding a Fixes tag. Link: https://lkml.kernel.org/r/20240215215907.20121-1-osalvador@suse.de Link: https://lkml.kernel.org/r/20240215215907.20121-2-osalvador@suse.de Co-developed-by: Marco Elver Signed-off-by: Marco Elver Signed-off-by: Oscar Salvador Acked-by: Vlastimil Babka Acked-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Michal Hocko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4a7055a63d9f..c043a4186bc5 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -45,15 +45,16 @@ #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ STACK_DEPOT_EXTRA_BITS) #define DEPOT_POOLS_CAP 8192 +/* The pool_index is offset by 1 so the first record does not have a 0 handle. */ #define DEPOT_MAX_POOLS \ - (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ - (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) + (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ + (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) /* Compact structure that stores a reference to a stack. */ union handle_parts { depot_stack_handle_t handle; struct { - u32 pool_index : DEPOT_POOL_INDEX_BITS; + u32 pool_index : DEPOT_POOL_INDEX_BITS; /* pool_index is offset by 1 */ u32 offset : DEPOT_OFFSET_BITS; u32 extra : STACK_DEPOT_EXTRA_BITS; }; @@ -372,7 +373,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) stack = current_pool + pool_offset; /* Pre-initialize handle once. */ - stack->handle.pool_index = pool_index; + stack->handle.pool_index = pool_index + 1; stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; INIT_LIST_HEAD(&stack->hash_list); @@ -483,18 +484,19 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; + u32 pool_index = parts.pool_index - 1; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; lockdep_assert_not_held(&pool_lock); - if (parts.pool_index > pools_num_cached) { + if (pool_index > pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pools_num_cached, handle); + pool_index, pools_num_cached, handle); return NULL; } - pool = stack_pools[parts.pool_index]; + pool = stack_pools[pool_index]; if (WARN_ON(!pool)) return NULL; -- cgit v1.2.3 From 8151c7a35d8bd8a12e93538ef7963ea209b6ab41 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Feb 2024 22:59:02 +0100 Subject: lib/stackdepot: move stack_record struct definition into the header In order to move the heavy lifting into page_owner code, this one needs to have access to the stack_record structure, which right now sits in lib/stackdepot.c. Move it to the stackdepot.h header so page_owner can access stack_record's struct fields. Link: https://lkml.kernel.org/r/20240215215907.20121-3-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: Marco Elver Reviewed-by: Vlastimil Babka Acked-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Michal Hocko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index c043a4186bc5..514b8d40ff57 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,55 +36,12 @@ #include #include -#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) - -#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ -#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) -#define DEPOT_STACK_ALIGN 4 -#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) -#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ - STACK_DEPOT_EXTRA_BITS) #define DEPOT_POOLS_CAP 8192 /* The pool_index is offset by 1 so the first record does not have a 0 handle. */ #define DEPOT_MAX_POOLS \ (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) -/* Compact structure that stores a reference to a stack. */ -union handle_parts { - depot_stack_handle_t handle; - struct { - u32 pool_index : DEPOT_POOL_INDEX_BITS; /* pool_index is offset by 1 */ - u32 offset : DEPOT_OFFSET_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; - }; -}; - -struct stack_record { - struct list_head hash_list; /* Links in the hash table */ - u32 hash; /* Hash in hash table */ - u32 size; /* Number of stored frames */ - union handle_parts handle; /* Constant after initialization */ - refcount_t count; - union { - unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ - struct { - /* - * An important invariant of the implementation is to - * only place a stack record onto the freelist iff its - * refcount is zero. Because stack records with a zero - * refcount are never considered as valid, it is safe to - * union @entries and freelist management state below. - * Conversely, as soon as an entry is off the freelist - * and its refcount becomes non-zero, the below must not - * be accessed until being placed back on the freelist. - */ - struct list_head free_list; /* Links in the freelist */ - unsigned long rcu_state; /* RCU cookie */ - }; - }; -}; - static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; -- cgit v1.2.3 From 4bedfb314bdd85c1662ecc46fa25b33b998f994d Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Feb 2024 22:59:03 +0100 Subject: mm,page_owner: maintain own list of stack_records structs page_owner needs to increment a stack_record refcount when a new allocation occurs, and decrement it on a free operation. In order to do that, we need to have a way to get a stack_record from a handle. Implement __stack_depot_get_stack_record() which just does that, and make it public so page_owner can use it. Also, traversing all stackdepot buckets comes with its own complexity, plus we would have to implement a way to mark only those stack_records that were originated from page_owner, as those are the ones we are interested in. For that reason, page_owner maintains its own list of stack_records, because traversing that list is faster than traversing all buckets while keeping at the same time a low complexity. For now, add to stack_list only the stack_records of dummy_handle and failure_handle, and set their refcount of 1. Further patches will add code to increment or decrement stack_records count on allocation and free operation. Link: https://lkml.kernel.org/r/20240215215907.20121-4-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: Vlastimil Babka Reviewed-by: Marco Elver Acked-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Michal Hocko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 514b8d40ff57..8c795bb20afb 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -687,6 +687,14 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, } EXPORT_SYMBOL_GPL(stack_depot_save); +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle) +{ + if (!handle) + return NULL; + + return depot_fetch_stack(handle); +} + unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { -- cgit v1.2.3 From 443cbaf9e2fdbef7d7cae457434a6cb8a679441b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Jan 2024 13:12:42 +0800 Subject: crash: split vmcoreinfo exporting code out from crash_core.c Now move the relevant codes into separate files: kernel/crash_reserve.c, include/linux/crash_reserve.h. And add config item CRASH_RESERVE to control its enabling. And also update the old ifdeffery of CONFIG_CRASH_CORE, including of and config item dependency on CRASH_CORE accordingly. And also do renaming as follows: - arch/xxx/kernel/{crash_core.c => vmcore_info.c} because they are only related to vmcoreinfo exporting on x86, arm64, riscv. And also Remove config item CRASH_CORE, and rely on CONFIG_KEXEC_CORE to decide if build in crash_core.c. [yang.lee@linux.alibaba.com: remove duplicated include in vmcore_info.c] Link: https://lkml.kernel.org/r/20240126005744.16561-1-yang.lee@linux.alibaba.com Link: https://lkml.kernel.org/r/20240124051254.67105-3-bhe@redhat.com Signed-off-by: Baoquan He Signed-off-by: Yang Li Acked-by: Hari Bathini Cc: Al Viro Cc: Eric W. Biederman Cc: Pingfan Liu Cc: Klara Modin Cc: Michael Kelley Cc: Nathan Chancellor Cc: Stephen Rothwell Cc: Yang Li Signed-off-by: Andrew Morton --- lib/buildid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/buildid.c b/lib/buildid.c index e3a7acdeef0e..3e6868c86b45 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -174,7 +174,7 @@ int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size) return parse_build_id_buf(build_id, NULL, buf, buf_size); } -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init; /** -- cgit v1.2.3 From dc24559472a682eb124e869cb110e7a2fd857322 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 Feb 2024 17:20:13 +0300 Subject: lib/stackdepot: off by one in depot_fetch_stack() The stack_pools[] array has DEPOT_MAX_POOLS. The "pools_num" tracks the number of pools which are initialized. See depot_init_pool() for more details. If pool_index == pools_num_cached, this will read one element beyond what we want. If not all the pools are initialized, then the pool will be NULL, triggering a WARN(), and if they are all initialized it will read one element beyond the end of the array. Link: https://lkml.kernel.org/r/361ac881-60b7-471f-91e5-5bf8fe8042b2@moroto.mountain Fixes: b29d31885814 ("lib/stackdepot: store free stack records in a freelist") Signed-off-by: Dan Carpenter Cc: Alexander Potapenko Cc: Andrey Konovalov Signed-off-by: Andrew Morton --- lib/stackdepot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 8c795bb20afb..af6cc19a2003 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -447,7 +447,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) lockdep_assert_not_held(&pool_lock); - if (pool_index > pools_num_cached) { + if (pool_index >= pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", pool_index, pools_num_cached, handle); return NULL; -- cgit v1.2.3 From 44503b97ad9784943afb39d62fce3936580bec6f Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 26 Feb 2024 20:11:57 +0100 Subject: lib/test_vmalloc.c: fix typo in function name Fix a typo and change the function name to init_test_configuration. Both caller and definition have the same typo, so the current code already works. Link: https://lkml.kernel.org/r/20240226191159.39509-2-martin@kaiser.cx Signed-off-by: Martin Kaiser Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 3718d9886407..191b6bd5dff9 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -501,7 +501,7 @@ static int test_func(void *private) } static int -init_test_configurtion(void) +init_test_configuration(void) { /* * A maximum number of workers is defined as hard-coded @@ -531,7 +531,7 @@ static void do_concurrent_test(void) /* * Set some basic configurations plus sanity check. */ - ret = init_test_configurtion(); + ret = init_test_configuration(); if (ret < 0) return; -- cgit v1.2.3 From e2c5bfebabaedbec8ca858b66f95f3a993428b0c Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 26 Feb 2024 20:11:58 +0100 Subject: lib/test_vmalloc.c: drop empty exit function The module is never loaded successfully. Therefore, it'll never be unloaded and we can remove the exit function. Link: https://lkml.kernel.org/r/20240226191159.39509-3-martin@kaiser.cx Signed-off-by: Martin Kaiser Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 191b6bd5dff9..d0c0cbe1913d 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -600,12 +600,7 @@ static int vmalloc_test_init(void) return -EAGAIN; /* Fail will directly unload the module */ } -static void vmalloc_test_exit(void) -{ -} - module_init(vmalloc_test_init) -module_exit(vmalloc_test_exit) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Uladzislau Rezki"); -- cgit v1.2.3 From 4c4a52544ae03d84ffd3b5e9593833ffe05485a1 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 26 Feb 2024 20:11:59 +0100 Subject: lib/test_vmalloc.c: use unsigned long constant Use an unsigned long constant instead of an int constant and a cast. This fixes the checkpatch warning WARNING: Unnecessary typecast of c90 int constant - '(unsigned long) 1' could be '1UL' + align = ((unsigned long) 1) << i; Link: https://lkml.kernel.org/r/20240226191159.39509-4-martin@kaiser.cx Signed-off-by: Martin Kaiser Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index d0c0cbe1913d..4ddf769861ff 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -117,7 +117,7 @@ static int align_shift_alloc_test(void) int i; for (i = 0; i < BITS_PER_LONG; i++) { - align = ((unsigned long) 1) << i; + align = 1UL << i; ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, __builtin_return_address(0)); -- cgit v1.2.3