summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-02-19 07:51:35 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-02-19 07:51:35 +0100
commit07749061b837a1268146dc8a620a522253cea877 (patch)
tree3708ac3f1a088869daf7e9826bc4b56abfcdac74 /lib
parent5c0941c55e5f681ffb05f395222ac673460bb3d0 (diff)
parentb401b621758e46812da61fa58a67c3fd8d91de0d (diff)
downloadlinux-stable-07749061b837a1268146dc8a620a522253cea877.tar.gz
linux-stable-07749061b837a1268146dc8a620a522253cea877.tar.bz2
linux-stable-07749061b837a1268146dc8a620a522253cea877.zip
Merge 6.8-rc5 into driver-core-next
We need the driver core changes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/kobject.c24
-rw-r--r--lib/kunit/device-impl.h2
-rw-r--r--lib/kunit/device.c18
-rw-r--r--lib/kunit/executor.c4
-rw-r--r--lib/kunit/kunit-test.c2
-rw-r--r--lib/kunit/test.c17
-rw-r--r--lib/seq_buf.c49
-rw-r--r--lib/stackdepot.c373
8 files changed, 344 insertions, 145 deletions
diff --git a/lib/kobject.c b/lib/kobject.c
index 59dbcbdb1c91..72fa20f405f1 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -74,10 +74,12 @@ static int create_dir(struct kobject *kobj)
if (error)
return error;
- error = sysfs_create_groups(kobj, ktype->default_groups);
- if (error) {
- sysfs_remove_dir(kobj);
- return error;
+ if (ktype) {
+ error = sysfs_create_groups(kobj, ktype->default_groups);
+ if (error) {
+ sysfs_remove_dir(kobj);
+ return error;
+ }
}
/*
@@ -589,7 +591,8 @@ static void __kobject_del(struct kobject *kobj)
sd = kobj->sd;
ktype = get_ktype(kobj);
- sysfs_remove_groups(kobj, ktype->default_groups);
+ if (ktype)
+ sysfs_remove_groups(kobj, ktype->default_groups);
/* send "remove" if the caller did not do it but sent "add" */
if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
@@ -666,6 +669,10 @@ static void kobject_cleanup(struct kobject *kobj)
pr_debug("'%s' (%p): %s, parent %p\n",
kobject_name(kobj), kobj, __func__, kobj->parent);
+ if (t && !t->release)
+ pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
+ kobject_name(kobj), kobj);
+
/* remove from sysfs if the caller did not do it */
if (kobj->state_in_sysfs) {
pr_debug("'%s' (%p): auto cleanup kobject_del\n",
@@ -676,13 +683,10 @@ static void kobject_cleanup(struct kobject *kobj)
parent = NULL;
}
- if (t->release) {
+ if (t && t->release) {
pr_debug("'%s' (%p): calling ktype release\n",
kobject_name(kobj), kobj);
t->release(kobj);
- } else {
- pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
- kobject_name(kobj), kobj);
}
/* free name if we allocated it */
@@ -1056,7 +1060,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa
{
const struct kobj_ns_type_operations *ops = NULL;
- if (parent && parent->ktype->child_ns_type)
+ if (parent && parent->ktype && parent->ktype->child_ns_type)
ops = parent->ktype->child_ns_type(parent);
return ops;
diff --git a/lib/kunit/device-impl.h b/lib/kunit/device-impl.h
index 54bd55836405..5fcd48ff0f36 100644
--- a/lib/kunit/device-impl.h
+++ b/lib/kunit/device-impl.h
@@ -13,5 +13,7 @@
// For internal use only -- registers the kunit_bus.
int kunit_bus_init(void);
+// For internal use only -- unregisters the kunit_bus.
+void kunit_bus_shutdown(void);
#endif //_KUNIT_DEVICE_IMPL_H
diff --git a/lib/kunit/device.c b/lib/kunit/device.c
index f5371287b375..644a38a1f5b1 100644
--- a/lib/kunit/device.c
+++ b/lib/kunit/device.c
@@ -45,8 +45,8 @@ int kunit_bus_init(void)
int error;
kunit_bus_device = root_device_register("kunit");
- if (!kunit_bus_device)
- return -ENOMEM;
+ if (IS_ERR(kunit_bus_device))
+ return PTR_ERR(kunit_bus_device);
error = bus_register(&kunit_bus_type);
if (error)
@@ -54,6 +54,20 @@ int kunit_bus_init(void)
return error;
}
+/* Unregister the 'kunit_bus' in case the KUnit module is unloaded. */
+void kunit_bus_shutdown(void)
+{
+ /* Make sure the bus exists before we unregister it. */
+ if (IS_ERR_OR_NULL(kunit_bus_device))
+ return;
+
+ bus_unregister(&kunit_bus_type);
+
+ root_device_unregister(kunit_bus_device);
+
+ kunit_bus_device = NULL;
+}
+
/* Release a 'fake' KUnit device. */
static void kunit_device_release(struct device *d)
{
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 717b9599036b..689fff2b2b10 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set)
kfree(suite_set.start);
}
+/*
+ * Filter and reallocate test suites. Must return the filtered test suites set
+ * allocated at a valid virtual address or NULL in case of error.
+ */
struct kunit_suite_set
kunit_filter_suites(const struct kunit_suite_set *suite_set,
const char *filter_glob,
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index c4259d910356..f7980ef236a3 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test)
long action_was_run = 0;
test_device = kunit_device_register(test, "my_device");
- KUNIT_ASSERT_NOT_NULL(test, test_device);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device);
/* Add an action to verify cleanup. */
devm_add_action(test_device, test_dev_action, &action_was_run);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index f95d2093a0aa..1d1475578515 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -17,6 +17,7 @@
#include <linux/panic.h>
#include <linux/sched/debug.h>
#include <linux/sched.h>
+#include <linux/mm.h>
#include "debugfs.h"
#include "device-impl.h"
@@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod)
};
const char *action = kunit_action();
+ /*
+ * Check if the start address is a valid virtual address to detect
+ * if the module load sequence has failed and the suite set has not
+ * been initialized and filtered.
+ */
+ if (!suite_set.start || !virt_addr_valid(suite_set.start))
+ return;
+
if (!action)
__kunit_test_suites_exit(mod->kunit_suites,
mod->num_kunit_suites);
- if (suite_set.start)
- kunit_free_suite_set(suite_set);
+ kunit_free_suite_set(suite_set);
}
static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
@@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
switch (val) {
case MODULE_STATE_LIVE:
+ kunit_module_init(mod);
break;
case MODULE_STATE_GOING:
kunit_module_exit(mod);
break;
case MODULE_STATE_COMING:
- kunit_module_init(mod);
break;
case MODULE_STATE_UNFORMED:
break;
@@ -920,6 +928,9 @@ static void __exit kunit_exit(void)
#ifdef CONFIG_MODULES
unregister_module_notifier(&kunit_mod_nb);
#endif
+
+ kunit_bus_shutdown();
+
kunit_debugfs_cleanup();
}
module_exit(kunit_exit);
diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 010c730ca7fc..f3f3436d60a9 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -13,16 +13,26 @@
* seq_buf_init() more than once to reset the seq_buf to start
* from scratch.
*/
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/hex.h>
+#include <linux/minmax.h>
+#include <linux/printk.h>
#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sprintf.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
/**
* seq_buf_can_fit - can the new data fit in the current buffer?
* @s: the seq_buf descriptor
* @len: The length to see if it can fit in the current buffer
*
- * Returns true if there's enough unused space in the seq_buf buffer
+ * Returns: true if there's enough unused space in the seq_buf buffer
* to fit the amount of new data according to @len.
*/
static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
@@ -35,7 +45,7 @@ static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
* @m: the seq_file descriptor that is the destination
* @s: the seq_buf descriptor that is the source.
*
- * Returns zero on success, non zero otherwise
+ * Returns: zero on success, non-zero otherwise.
*/
int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
{
@@ -50,9 +60,9 @@ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
* @fmt: printf format string
* @args: va_list of arguments from a printf() type function
*
- * Writes a vnprintf() format into the sequencce buffer.
+ * Writes a vnprintf() format into the sequence buffer.
*
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
{
@@ -78,7 +88,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
*
* Writes a printf() format into the sequence buffer.
*
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
{
@@ -94,12 +104,12 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
EXPORT_SYMBOL_GPL(seq_buf_printf);
/**
- * seq_buf_do_printk - printk seq_buf line by line
+ * seq_buf_do_printk - printk() seq_buf line by line
* @s: seq_buf descriptor
* @lvl: printk level
*
* printk()-s a multi-line sequential buffer line by line. The function
- * makes sure that the buffer in @s is nul terminated and safe to read
+ * makes sure that the buffer in @s is NUL-terminated and safe to read
* as a string.
*/
void seq_buf_do_printk(struct seq_buf *s, const char *lvl)
@@ -139,7 +149,7 @@ EXPORT_SYMBOL_GPL(seq_buf_do_printk);
* This function will take the format and the binary array and finish
* the conversion into the ASCII string within the buffer.
*
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
{
@@ -167,7 +177,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
*
* Copy a simple string into the sequence buffer.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_puts(struct seq_buf *s, const char *str)
{
@@ -196,7 +206,7 @@ EXPORT_SYMBOL_GPL(seq_buf_puts);
*
* Copy a single character into the sequence buffer.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_putc(struct seq_buf *s, unsigned char c)
{
@@ -212,7 +222,7 @@ int seq_buf_putc(struct seq_buf *s, unsigned char c)
EXPORT_SYMBOL_GPL(seq_buf_putc);
/**
- * seq_buf_putmem - write raw data into the sequenc buffer
+ * seq_buf_putmem - write raw data into the sequence buffer
* @s: seq_buf descriptor
* @mem: The raw memory to copy into the buffer
* @len: The length of the raw memory to copy (in bytes)
@@ -221,7 +231,7 @@ EXPORT_SYMBOL_GPL(seq_buf_putc);
* buffer and a strcpy() would not work. Using this function allows
* for such cases.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
{
@@ -249,7 +259,7 @@ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
* raw memory into the buffer it writes its ASCII representation of it
* in hex characters.
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
unsigned int len)
@@ -297,7 +307,7 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
*
* Write a path name into the sequence buffer.
*
- * Returns the number of written bytes on success, -1 on overflow
+ * Returns: the number of written bytes on success, -1 on overflow.
*/
int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
{
@@ -332,6 +342,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
* or until it reaches the end of the content in the buffer (@s->len),
* whichever comes first.
*
+ * Returns:
* On success, it returns a positive number of the number of bytes
* it copied.
*
@@ -382,11 +393,11 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt)
* linebuf size is maximal length for one line.
* 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for
* separating space
- * 2 - spaces separating hex dump and ascii representation
- * 32 - ascii representation
+ * 2 - spaces separating hex dump and ASCII representation
+ * 32 - ASCII representation
* 1 - terminating '\0'
*
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
*/
int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type,
int rowsize, int groupsize,
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index a0be5d05c7f0..5caa1f566553 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) "stackdepot: " fmt
+#include <linux/debugfs.h>
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <linux/kernel.h>
@@ -21,8 +22,9 @@
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
-#include <linux/percpu.h>
#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -67,12 +69,28 @@ union handle_parts {
};
struct stack_record {
- struct list_head list; /* Links in hash table or freelist */
+ struct list_head hash_list; /* Links in the hash table */
u32 hash; /* Hash in hash table */
u32 size; /* Number of stored frames */
- union handle_parts handle;
+ union handle_parts handle; /* Constant after initialization */
refcount_t count;
- unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */
+ union {
+ unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */
+ struct {
+ /*
+ * An important invariant of the implementation is to
+ * only place a stack record onto the freelist iff its
+ * refcount is zero. Because stack records with a zero
+ * refcount are never considered as valid, it is safe to
+ * union @entries and freelist management state below.
+ * Conversely, as soon as an entry is off the freelist
+ * and its refcount becomes non-zero, the below must not
+ * be accessed until being placed back on the freelist.
+ */
+ struct list_head free_list; /* Links in the freelist */
+ unsigned long rcu_state; /* RCU cookie */
+ };
+ };
};
#define DEPOT_STACK_RECORD_SIZE \
@@ -112,8 +130,25 @@ static LIST_HEAD(free_stacks);
* yet allocated or if the limit on the number of pools is reached.
*/
static bool new_pool_required = true;
-/* Lock that protects the variables above. */
-static DEFINE_RWLOCK(pool_rwlock);
+/* The lock must be held when performing pool or freelist modifications. */
+static DEFINE_RAW_SPINLOCK(pool_lock);
+
+/* Statistics counters for debugfs. */
+enum depot_counter_id {
+ DEPOT_COUNTER_ALLOCS,
+ DEPOT_COUNTER_FREES,
+ DEPOT_COUNTER_INUSE,
+ DEPOT_COUNTER_FREELIST_SIZE,
+ DEPOT_COUNTER_COUNT,
+};
+static long counters[DEPOT_COUNTER_COUNT];
+static const char *const counter_names[] = {
+ [DEPOT_COUNTER_ALLOCS] = "allocations",
+ [DEPOT_COUNTER_FREES] = "frees",
+ [DEPOT_COUNTER_INUSE] = "in_use",
+ [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size",
+};
+static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
static int __init disable_stack_depot(char *str)
{
@@ -258,14 +293,15 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(stack_depot_init);
-/* Initializes a stack depol pool. */
+/*
+ * Initializes new stack depot @pool, release all its entries to the freelist,
+ * and update the list of pools.
+ */
static void depot_init_pool(void *pool)
{
int offset;
- lockdep_assert_held_write(&pool_rwlock);
-
- WARN_ON(!list_empty(&free_stacks));
+ lockdep_assert_held(&pool_lock);
/* Initialize handles and link stack records into the freelist. */
for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE;
@@ -276,18 +312,36 @@ static void depot_init_pool(void *pool)
stack->handle.offset = offset >> DEPOT_STACK_ALIGN;
stack->handle.extra = 0;
- list_add(&stack->list, &free_stacks);
+ /*
+ * Stack traces of size 0 are never saved, and we can simply use
+ * the size field as an indicator if this is a new unused stack
+ * record in the freelist.
+ */
+ stack->size = 0;
+
+ INIT_LIST_HEAD(&stack->hash_list);
+ /*
+ * Add to the freelist front to prioritize never-used entries:
+ * required in case there are entries in the freelist, but their
+ * RCU cookie still belongs to the current RCU grace period
+ * (there can still be concurrent readers).
+ */
+ list_add(&stack->free_list, &free_stacks);
+ counters[DEPOT_COUNTER_FREELIST_SIZE]++;
}
/* Save reference to the pool to be used by depot_fetch_stack(). */
stack_pools[pools_num] = pool;
- pools_num++;
+
+ /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
+ WRITE_ONCE(pools_num, pools_num + 1);
+ ASSERT_EXCLUSIVE_WRITER(pools_num);
}
/* Keeps the preallocated memory to be used for a new stack depot pool. */
static void depot_keep_new_pool(void **prealloc)
{
- lockdep_assert_held_write(&pool_rwlock);
+ lockdep_assert_held(&pool_lock);
/*
* If a new pool is already saved or the maximum number of
@@ -310,17 +364,16 @@ static void depot_keep_new_pool(void **prealloc)
* number of pools is reached. In either case, take note that
* keeping another pool is not required.
*/
- new_pool_required = false;
+ WRITE_ONCE(new_pool_required, false);
}
-/* Updates references to the current and the next stack depot pools. */
-static bool depot_update_pools(void **prealloc)
+/*
+ * Try to initialize a new stack depot pool from either a previous or the
+ * current pre-allocation, and release all its entries to the freelist.
+ */
+static bool depot_try_init_pool(void **prealloc)
{
- lockdep_assert_held_write(&pool_rwlock);
-
- /* Check if we still have objects in the freelist. */
- if (!list_empty(&free_stacks))
- goto out_keep_prealloc;
+ lockdep_assert_held(&pool_lock);
/* Check if we have a new pool saved and use it. */
if (new_pool) {
@@ -329,10 +382,9 @@ static bool depot_update_pools(void **prealloc)
/* Take note that we might need a new new_pool. */
if (pools_num < DEPOT_MAX_POOLS)
- new_pool_required = true;
+ WRITE_ONCE(new_pool_required, true);
- /* Try keeping the preallocated memory for new_pool. */
- goto out_keep_prealloc;
+ return true;
}
/* Bail out if we reached the pool limit. */
@@ -349,12 +401,32 @@ static bool depot_update_pools(void **prealloc)
}
return false;
+}
+
+/* Try to find next free usable entry. */
+static struct stack_record *depot_pop_free(void)
+{
+ struct stack_record *stack;
+
+ lockdep_assert_held(&pool_lock);
+
+ if (list_empty(&free_stacks))
+ return NULL;
+
+ /*
+ * We maintain the invariant that the elements in front are least
+ * recently used, and are therefore more likely to be associated with an
+ * RCU grace period in the past. Consequently it is sufficient to only
+ * check the first entry.
+ */
+ stack = list_first_entry(&free_stacks, struct stack_record, free_list);
+ if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state))
+ return NULL;
+
+ list_del(&stack->free_list);
+ counters[DEPOT_COUNTER_FREELIST_SIZE]--;
-out_keep_prealloc:
- /* Keep the preallocated memory for a new pool if required. */
- if (*prealloc)
- depot_keep_new_pool(prealloc);
- return true;
+ return stack;
}
/* Allocates a new stack in a stack depot pool. */
@@ -363,19 +435,22 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
{
struct stack_record *stack;
- lockdep_assert_held_write(&pool_rwlock);
+ lockdep_assert_held(&pool_lock);
- /* Update current and new pools if required and possible. */
- if (!depot_update_pools(prealloc))
+ /* This should already be checked by public API entry points. */
+ if (WARN_ON_ONCE(!size))
return NULL;
/* Check if we have a stack record to save the stack trace. */
- if (list_empty(&free_stacks))
- return NULL;
-
- /* Get and unlink the first entry from the freelist. */
- stack = list_first_entry(&free_stacks, struct stack_record, list);
- list_del(&stack->list);
+ stack = depot_pop_free();
+ if (!stack) {
+ /* No usable entries on the freelist - try to refill the freelist. */
+ if (!depot_try_init_pool(prealloc))
+ return NULL;
+ stack = depot_pop_free();
+ if (WARN_ON(!stack))
+ return NULL;
+ }
/* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
if (size > CONFIG_STACKDEPOT_MAX_FRAMES)
@@ -394,38 +469,80 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
*/
kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE);
+ counters[DEPOT_COUNTER_ALLOCS]++;
+ counters[DEPOT_COUNTER_INUSE]++;
return stack;
}
static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
{
+ const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
- lockdep_assert_held(&pool_rwlock);
+ lockdep_assert_not_held(&pool_lock);
- if (parts.pool_index > pools_num) {
+ if (parts.pool_index > pools_num_cached) {
WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
- parts.pool_index, pools_num, handle);
+ parts.pool_index, pools_num_cached, handle);
return NULL;
}
pool = stack_pools[parts.pool_index];
- if (!pool)
+ if (WARN_ON(!pool))
return NULL;
stack = pool + offset;
+ if (WARN_ON(!refcount_read(&stack->count)))
+ return NULL;
+
return stack;
}
/* Links stack into the freelist. */
static void depot_free_stack(struct stack_record *stack)
{
- lockdep_assert_held_write(&pool_rwlock);
+ unsigned long flags;
+
+ lockdep_assert_not_held(&pool_lock);
- list_add(&stack->list, &free_stacks);
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ printk_deferred_enter();
+
+ /*
+ * Remove the entry from the hash list. Concurrent list traversal may
+ * still observe the entry, but since the refcount is zero, this entry
+ * will no longer be considered as valid.
+ */
+ list_del_rcu(&stack->hash_list);
+
+ /*
+ * Due to being used from constrained contexts such as the allocators,
+ * NMI, or even RCU itself, stack depot cannot rely on primitives that
+ * would sleep (such as synchronize_rcu()) or recursively call into
+ * stack depot again (such as call_rcu()).
+ *
+ * Instead, get an RCU cookie, so that we can ensure this entry isn't
+ * moved onto another list until the next grace period, and concurrent
+ * RCU list traversal remains safe.
+ */
+ stack->rcu_state = get_state_synchronize_rcu();
+
+ /*
+ * Add the entry to the freelist tail, so that older entries are
+ * considered first - their RCU cookie is more likely to no longer be
+ * associated with the current grace period.
+ */
+ list_add_tail(&stack->free_list, &free_stacks);
+
+ counters[DEPOT_COUNTER_FREELIST_SIZE]++;
+ counters[DEPOT_COUNTER_FREES]++;
+ counters[DEPOT_COUNTER_INUSE]--;
+
+ printk_deferred_exit();
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
}
/* Calculates the hash for a stack. */
@@ -453,22 +570,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
/* Finds a stack in a bucket of the hash table. */
static inline struct stack_record *find_stack(struct list_head *bucket,
- unsigned long *entries, int size,
- u32 hash)
+ unsigned long *entries, int size,
+ u32 hash, depot_flags_t flags)
{
- struct list_head *pos;
- struct stack_record *found;
+ struct stack_record *stack, *ret = NULL;
+
+ /*
+ * Stack depot may be used from instrumentation that instruments RCU or
+ * tracing itself; use variant that does not call into RCU and cannot be
+ * traced.
+ *
+ * Note: Such use cases must take care when using refcounting to evict
+ * unused entries, because the stack record free-then-reuse code paths
+ * do call into RCU.
+ */
+ rcu_read_lock_sched_notrace();
- lockdep_assert_held(&pool_rwlock);
+ list_for_each_entry_rcu(stack, bucket, hash_list) {
+ if (stack->hash != hash || stack->size != size)
+ continue;
+
+ /*
+ * This may race with depot_free_stack() accessing the freelist
+ * management state unioned with @entries. The refcount is zero
+ * in that case and the below refcount_inc_not_zero() will fail.
+ */
+ if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
+ continue;
+
+ /*
+ * Try to increment refcount. If this succeeds, the stack record
+ * is valid and has not yet been freed.
+ *
+ * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior
+ * to then call stack_depot_put() later, and we can assume that
+ * a stack record is never placed back on the freelist.
+ */
+ if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
+ continue;
- list_for_each(pos, bucket) {
- found = list_entry(pos, struct stack_record, list);
- if (found->hash == hash &&
- found->size == size &&
- !stackdepot_memcmp(entries, found->entries, size))
- return found;
+ ret = stack;
+ break;
}
- return NULL;
+
+ rcu_read_unlock_sched_notrace();
+
+ return ret;
}
depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
@@ -482,7 +629,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
struct page *page = NULL;
void *prealloc = NULL;
bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC;
- bool need_alloc = false;
unsigned long flags;
u32 hash;
@@ -505,31 +651,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
hash = hash_stack(entries, nr_entries);
bucket = &stack_table[hash & stack_hash_mask];
- read_lock_irqsave(&pool_rwlock, flags);
- printk_deferred_enter();
-
- /* Fast path: look the stack trace up without full locking. */
- found = find_stack(bucket, entries, nr_entries, hash);
- if (found) {
- if (depot_flags & STACK_DEPOT_FLAG_GET)
- refcount_inc(&found->count);
- printk_deferred_exit();
- read_unlock_irqrestore(&pool_rwlock, flags);
+ /* Fast path: look the stack trace up without locking. */
+ found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
+ if (found)
goto exit;
- }
-
- /* Take note if another stack pool needs to be allocated. */
- if (new_pool_required)
- need_alloc = true;
-
- printk_deferred_exit();
- read_unlock_irqrestore(&pool_rwlock, flags);
/*
* Allocate memory for a new pool if required now:
* we won't be able to do that under the lock.
*/
- if (unlikely(can_alloc && need_alloc)) {
+ if (unlikely(can_alloc && READ_ONCE(new_pool_required))) {
/*
* Zero out zone modifiers, as we don't have specific zone
* requirements. Keep the flags related to allocation in atomic
@@ -543,31 +674,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
prealloc = page_address(page);
}
- write_lock_irqsave(&pool_rwlock, flags);
+ raw_spin_lock_irqsave(&pool_lock, flags);
printk_deferred_enter();
- found = find_stack(bucket, entries, nr_entries, hash);
+ /* Try to find again, to avoid concurrently inserting duplicates. */
+ found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (!found) {
struct stack_record *new =
depot_alloc_stack(entries, nr_entries, hash, &prealloc);
if (new) {
- list_add(&new->list, bucket);
+ /*
+ * This releases the stack record into the bucket and
+ * makes it visible to readers in find_stack().
+ */
+ list_add_rcu(&new->hash_list, bucket);
found = new;
}
- } else {
- if (depot_flags & STACK_DEPOT_FLAG_GET)
- refcount_inc(&found->count);
+ }
+
+ if (prealloc) {
/*
- * Stack depot already contains this stack trace, but let's
- * keep the preallocated memory for future.
+ * Either stack depot already contains this stack trace, or
+ * depot_alloc_stack() did not consume the preallocated memory.
+ * Try to keep the preallocated memory for future.
*/
- if (prealloc)
- depot_keep_new_pool(&prealloc);
+ depot_keep_new_pool(&prealloc);
}
printk_deferred_exit();
- write_unlock_irqrestore(&pool_rwlock, flags);
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
exit:
if (prealloc) {
/* Stack depot didn't use this memory, free it. */
@@ -592,7 +728,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries)
{
struct stack_record *stack;
- unsigned long flags;
*entries = NULL;
/*
@@ -604,13 +739,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
if (!handle || stack_depot_disabled)
return 0;
- read_lock_irqsave(&pool_rwlock, flags);
- printk_deferred_enter();
-
stack = depot_fetch_stack(handle);
-
- printk_deferred_exit();
- read_unlock_irqrestore(&pool_rwlock, flags);
+ /*
+ * Should never be NULL, otherwise this is a use-after-put (or just a
+ * corrupt handle).
+ */
+ if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
+ return 0;
*entries = stack->entries;
return stack->size;
@@ -620,29 +755,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
void stack_depot_put(depot_stack_handle_t handle)
{
struct stack_record *stack;
- unsigned long flags;
if (!handle || stack_depot_disabled)
return;
- write_lock_irqsave(&pool_rwlock, flags);
- printk_deferred_enter();
-
stack = depot_fetch_stack(handle);
- if (WARN_ON(!stack))
- goto out;
-
- if (refcount_dec_and_test(&stack->count)) {
- /* Unlink stack from the hash table. */
- list_del(&stack->list);
+ /*
+ * Should always be able to find the stack record, otherwise this is an
+ * unbalanced put attempt (or corrupt handle).
+ */
+ if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
+ return;
- /* Free stack. */
+ if (refcount_dec_and_test(&stack->count))
depot_free_stack(stack);
- }
-
-out:
- printk_deferred_exit();
- write_unlock_irqrestore(&pool_rwlock, flags);
}
EXPORT_SYMBOL_GPL(stack_depot_put);
@@ -690,3 +816,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
return parts.extra;
}
EXPORT_SYMBOL(stack_depot_get_extra_bits);
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+ /*
+ * data race ok: These are just statistics counters, and approximate
+ * statistics are ok for debugging.
+ */
+ seq_printf(seq, "pools: %d\n", data_race(pools_num));
+ for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
+ seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+static int depot_debugfs_init(void)
+{
+ struct dentry *dir;
+
+ if (stack_depot_disabled)
+ return 0;
+
+ dir = debugfs_create_dir("stackdepot", NULL);
+ debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
+ return 0;
+}
+late_initcall(depot_debugfs_init);