diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/bpf_lsm.c | 1 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 4 | ||||
-rw-r--r-- | kernel/bpf/memalloc.c | 2 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 25 | ||||
-rw-r--r-- | kernel/cgroup/cpuset.c | 3 | ||||
-rw-r--r-- | kernel/events/core.c | 39 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 4 | ||||
-rw-r--r-- | kernel/irq/msi.c | 6 | ||||
-rw-r--r-- | kernel/module/main.c | 26 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 8 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 3 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 23 | ||||
-rw-r--r-- | kernel/trace/rv/rv.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.h | 1 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_osnoise.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 3 |
19 files changed, 109 insertions, 58 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index a4a41ee3e80b..e14c822f8911 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -51,7 +51,6 @@ BTF_SET_END(bpf_lsm_current_hooks) */ BTF_SET_START(bpf_lsm_locked_sockopt_hooks) #ifdef CONFIG_SECURITY_NETWORK -BTF_ID(func, bpf_lsm_socket_sock_rcv_skb) BTF_ID(func, bpf_lsm_sock_graft) BTF_ID(func, bpf_lsm_inet_csk_clone) BTF_ID(func, bpf_lsm_inet_conn_established) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f7dd8af06413..b7017cae6fd1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); - return 0; end: - btf_free_dtor_kfunc_tab(btf); + if (ret) + btf_free_dtor_kfunc_tab(btf); btf_put(btf); return ret; } diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index ebcc3dd0fa19..1db156405b68 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -71,7 +71,7 @@ static int bpf_mem_cache_idx(size_t size) if (size <= 192) return size_index[(size - 1) / 8] - 1; - return fls(size - 1) - 1; + return fls(size - 1) - 2; } #define NUM_CACHES 11 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbef0b0967ae..7ee218827259 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3243,13 +3243,24 @@ static bool __is_pointer_value(bool allow_ptr_leaks, return reg->type != SCALAR_VALUE; } +/* Copy src state preserving dst->parent and dst->live fields */ +static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) +{ + struct bpf_reg_state *parent = dst->parent; + enum bpf_reg_liveness live = dst->live; + + *dst = *src; + dst->parent = parent; + dst->live = live; +} + static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg, int size) { int i; - state->stack[spi].spilled_ptr = *reg; + copy_register_state(&state->stack[spi].spilled_ptr, reg); if (size == BPF_REG_SIZE) state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; @@ -3577,7 +3588,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, */ s32 subreg_def = state->regs[dst_regno].subreg_def; - state->regs[dst_regno] = *reg; + copy_register_state(&state->regs[dst_regno], reg); state->regs[dst_regno].subreg_def = subreg_def; } else { for (i = 0; i < size; i++) { @@ -3598,7 +3609,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (dst_regno >= 0) { /* restore register state from stack */ - state->regs[dst_regno] = *reg; + copy_register_state(&state->regs[dst_regno], reg); /* mark reg as written since spilled pointer state likely * has its liveness marks cleared by is_state_visited() * which resets stack/reg liveness for state transitions @@ -9592,7 +9603,7 @@ do_sim: */ if (!ptr_is_dst_reg) { tmp = *dst_reg; - *dst_reg = *ptr_reg; + copy_register_state(dst_reg, ptr_reg); } ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx); @@ -10845,7 +10856,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * to propagate min/max range. */ src_reg->id = ++env->id_gen; - *dst_reg = *src_reg; + copy_register_state(dst_reg, src_reg); dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = DEF_NOT_SUBREG; } else { @@ -10856,7 +10867,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg); return -EACCES; } else if (src_reg->type == SCALAR_VALUE) { - *dst_reg = *src_reg; + copy_register_state(dst_reg, src_reg); /* Make sure ID is cleared otherwise * dst_reg min/max could be incorrectly * propagated into src_reg by find_equal_scalars() @@ -11655,7 +11666,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, bpf_for_each_reg_in_vstate(vstate, state, reg, ({ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; + copy_register_state(reg, known_reg); })); } diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a29c0b13706b..205dc9edcaa9 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1346,7 +1346,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, * A parent can be left with no CPU as long as there is no * task directly associated with the parent partition. */ - if (!cpumask_intersects(cs->cpus_allowed, parent->effective_cpus) && + if (cpumask_subset(parent->effective_cpus, cs->cpus_allowed) && partition_is_populated(parent, cs)) return PERR_NOCPUS; @@ -2324,6 +2324,7 @@ out: new_prs = -new_prs; spin_lock_irq(&callback_lock); cs->partition_root_state = new_prs; + WRITE_ONCE(cs->prs_err, err); spin_unlock_irq(&callback_lock); /* * Update child cpusets, if present. diff --git a/kernel/events/core.c b/kernel/events/core.c index d56328e5080e..c4be13e50547 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4813,19 +4813,17 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); epc = &cpc->epc; - + raw_spin_lock_irq(&ctx->lock); if (!epc->ctx) { atomic_set(&epc->refcount, 1); epc->embedded = 1; - raw_spin_lock_irq(&ctx->lock); list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); epc->ctx = ctx; - raw_spin_unlock_irq(&ctx->lock); } else { WARN_ON_ONCE(epc->ctx != ctx); atomic_inc(&epc->refcount); } - + raw_spin_unlock_irq(&ctx->lock); return epc; } @@ -4896,33 +4894,30 @@ static void free_epc_rcu(struct rcu_head *head) static void put_pmu_ctx(struct perf_event_pmu_context *epc) { + struct perf_event_context *ctx = epc->ctx; unsigned long flags; - if (!atomic_dec_and_test(&epc->refcount)) + /* + * XXX + * + * lockdep_assert_held(&ctx->mutex); + * + * can't because of the call-site in _free_event()/put_event() + * which isn't always called under ctx->mutex. + */ + if (!atomic_dec_and_raw_lock_irqsave(&epc->refcount, &ctx->lock, flags)) return; - if (epc->ctx) { - struct perf_event_context *ctx = epc->ctx; + WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry)); - /* - * XXX - * - * lockdep_assert_held(&ctx->mutex); - * - * can't because of the call-site in _free_event()/put_event() - * which isn't always called under ctx->mutex. - */ - - WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry)); - raw_spin_lock_irqsave(&ctx->lock, flags); - list_del_init(&epc->pmu_ctx_entry); - epc->ctx = NULL; - raw_spin_unlock_irqrestore(&ctx->lock, flags); - } + list_del_init(&epc->pmu_ctx_entry); + epc->ctx = NULL; WARN_ON_ONCE(!list_empty(&epc->pinned_active)); WARN_ON_ONCE(!list_empty(&epc->flexible_active)); + raw_spin_unlock_irqrestore(&ctx->lock, flags); + if (epc->embedded) return; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8fe1da9614ee..798a9042421f 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -114,7 +114,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) { struct irqchip_fwid *fwid; - if (WARN_ON(!is_fwnode_irqchip(fwnode))) + if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode))) return; fwid = container_of(fwnode, struct irqchip_fwid, fwnode); @@ -1915,7 +1915,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d) static void debugfs_remove_domain_dir(struct irq_domain *d) { - debugfs_remove(debugfs_lookup(d->name, domain_dir)); + debugfs_lookup_and_remove(d->name, domain_dir); } void __init irq_domain_debugfs_init(struct dentry *root) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 955267bbc2be..783a3e6a0b10 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1000,7 +1000,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, fail: msi_unlock_descs(dev); free_fwnode: - kfree(fwnode); + irq_domain_free_fwnode(fwnode); free_bundle: kfree(bundle); return false; @@ -1013,6 +1013,7 @@ free_bundle: */ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) { + struct fwnode_handle *fwnode = NULL; struct msi_domain_info *info; struct irq_domain *domain; @@ -1025,7 +1026,10 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) dev->msi.data->__domains[domid].domain = NULL; info = domain->host_data; + if (irq_domain_is_msi_device(domain)) + fwnode = domain->fwnode; irq_domain_remove(domain); + irq_domain_free_fwnode(fwnode); kfree(container_of(info, struct msi_domain_template, info)); unlock: diff --git a/kernel/module/main.c b/kernel/module/main.c index 48568a0f5651..4ac3fe43e6c8 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2393,7 +2393,8 @@ static bool finished_loading(const char *name) sched_annotate_sleep(); mutex_lock(&module_mutex); mod = find_module_all(name, strlen(name), true); - ret = !mod || mod->state == MODULE_STATE_LIVE; + ret = !mod || mod->state == MODULE_STATE_LIVE + || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); return ret; @@ -2569,20 +2570,35 @@ static int add_unformed_module(struct module *mod) mod->state = MODULE_STATE_UNFORMED; -again: mutex_lock(&module_mutex); old = find_module_all(mod->name, strlen(mod->name), true); if (old != NULL) { - if (old->state != MODULE_STATE_LIVE) { + if (old->state == MODULE_STATE_COMING + || old->state == MODULE_STATE_UNFORMED) { /* Wait in case it fails to load. */ mutex_unlock(&module_mutex); err = wait_event_interruptible(module_wq, finished_loading(mod->name)); if (err) goto out_unlocked; - goto again; + + /* The module might have gone in the meantime. */ + mutex_lock(&module_mutex); + old = find_module_all(mod->name, strlen(mod->name), + true); } - err = -EEXIST; + + /* + * We are here only when the same module was being loaded. Do + * not try to load it again right now. It prevents long delays + * caused by serialized module load failures. It might happen + * when more devices of the same type trigger load of + * a particular module. + */ + if (old && old->state == MODULE_STATE_LIVE) + err = -EEXIST; + else + err = -EBUSY; goto out; } mod_update_bounds(mod); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 197545241ab8..d7043043f59c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -933,8 +933,8 @@ config RING_BUFFER_RECORD_RECURSION default y help The ring buffer has its own internal recursion. Although when - recursion happens it wont cause harm because of the protection, - but it does cause an unwanted overhead. Enabling this option will + recursion happens it won't cause harm because of the protection, + but it does cause unwanted overhead. Enabling this option will place where recursion was detected into the ftrace "recursed_functions" file. @@ -1017,8 +1017,8 @@ config RING_BUFFER_STARTUP_TEST The test runs for 10 seconds. This will slow your boot time by at least 10 more seconds. - At the end of the test, statics and more checks are done. - It will output the stats of each per cpu buffer. What + At the end of the test, statistics and more checks are done. + It will output the stats of each per cpu buffer: What was written, the sizes, what was read, what was lost, and other similar details. diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f47274de012b..c09792c551bf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -833,6 +833,7 @@ static void do_bpf_send_signal(struct irq_work *entry) work = container_of(entry, struct send_signal_irq_work, irq_work); group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); + put_task_struct(work->task); } static int bpf_send_signal_common(u32 sig, enum pid_type type) @@ -867,7 +868,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) * to the irq_work. The current task may change when queued * irq works get executed. */ - work->task = current; + work->task = get_task_struct(current); work->sig = sig; work->type = type; irq_work_queue(&work->irq_work); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 442438b93fe9..750aa3f08b25 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1248,12 +1248,17 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } +/** + * ftrace_free_filter - remove all filters for an ftrace_ops + * @ops - the ops to remove the filters from + */ void ftrace_free_filter(struct ftrace_ops *ops) { ftrace_ops_init(ops); free_ftrace_hash(ops->func_hash->filter_hash); free_ftrace_hash(ops->func_hash->notrace_hash); } +EXPORT_SYMBOL_GPL(ftrace_free_filter); static struct ftrace_hash *alloc_ftrace_hash(int size_bits) { @@ -5839,6 +5844,10 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi); * * Filters denote which functions should be enabled when tracing is enabled * If @ip is NULL, it fails to update filter. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset) @@ -5858,7 +5867,11 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); * * Filters denote which functions should be enabled when tracing is enabled * If @ips array or any ip specified within is NULL , it fails to update filter. - */ + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). +*/ int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt, int remove, int reset) { @@ -5900,6 +5913,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, * * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) @@ -5919,6 +5936,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); * Notrace Filters denote which functions should not be enabled when tracing * is enabled. If @buf is NULL and reset is set, all functions will be enabled * for tracing. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 6c97cc2d754a..7e9061828c24 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -516,7 +516,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user struct rv_monitor_def *mdef; int retval = -EINVAL; bool enable = true; - char *ptr = buff; + char *ptr; int len; if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a555a861b978..78ed5f1baa8c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10295,6 +10295,8 @@ void __init early_trace_init(void) static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); + + init_events(); } void __init trace_init(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e46a49269be2..4eb6d6b97a9f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1490,6 +1490,7 @@ extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); +extern int init_events(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 96acc2b71ac7..e095c3b3a50d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -128,7 +128,7 @@ static bool is_not(const char *str) } /** - * prog_entry - a singe entry in the filter program + * struct prog_entry - a singe entry in the filter program * @target: Index to jump to on a branch (actually one minus the index) * @when_to_branch: The value of the result of the predicate to do a branch * @pred: The predicate to execute. @@ -140,16 +140,16 @@ struct prog_entry { }; /** - * update_preds- assign a program entry a label target + * update_preds - assign a program entry a label target * @prog: The program array * @N: The index of the current entry in @prog - * @when_to_branch: What to assign a program entry for its branch condition + * @invert: What to assign a program entry for its branch condition * * The program entry at @N has a target that points to the index of a program * entry that can have its target and when_to_branch fields updated. * Update the current program entry denoted by index @N target field to be * that of the updated entry. This will denote the entry to update if - * we are processing an "||" after an "&&" + * we are processing an "||" after an "&&". */ static void update_preds(struct prog_entry *prog, int N, int invert) { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index fcaf226b7744..5edbf6b1da3f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1988,6 +1988,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, hist_field->fn_num = flags & HIST_FIELD_FL_LOG2 ? HIST_FIELD_FN_LOG2 : HIST_FIELD_FN_BUCKET; hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + if (!hist_field->operands[0]) + goto free; hist_field->size = hist_field->operands[0]->size; hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL); if (!hist_field->type) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 94c1b5eb1dc0..210e1f168392 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -147,9 +147,8 @@ static void osnoise_unregister_instance(struct trace_array *tr) * register/unregister serialization is provided by trace's * trace_types_lock. */ - lockdep_assert_held(&trace_types_lock); - - list_for_each_entry_rcu(inst, &osnoise_instances, list) { + list_for_each_entry_rcu(inst, &osnoise_instances, list, + lockdep_is_held(&trace_types_lock)) { if (inst->tr == tr) { list_del_rcu(&inst->list); found = 1; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 57a13b61f186..bd475a00f96d 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1535,7 +1535,7 @@ static struct trace_event *events[] __initdata = { NULL }; -__init static int init_events(void) +__init int init_events(void) { struct trace_event *event; int i, ret; @@ -1548,4 +1548,3 @@ __init static int init_events(void) return 0; } -early_initcall(init_events); |