summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c68
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/crash_core.c1
-rw-r--r--kernel/entry/common.c5
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/irq_work.c3
-rw-r--r--kernel/printk/printk_safe.c2
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/debug.c3
-rw-r--r--kernel/sched/fair.c28
-rw-r--r--kernel/sched/pelt.h11
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/trace.c13
-rw-r--r--kernel/trace/trace_clock.c6
15 files changed, 104 insertions, 52 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 94ba5163d4c5..c6a27574242d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6483,6 +6483,27 @@ struct bpf_sanitize_info {
bool mask_to_left;
};
+static struct bpf_verifier_state *
+sanitize_speculative_path(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ u32 next_idx, u32 curr_idx)
+{
+ struct bpf_verifier_state *branch;
+ struct bpf_reg_state *regs;
+
+ branch = push_stack(env, next_idx, curr_idx, true);
+ if (branch && insn) {
+ regs = branch->frame[branch->curframe]->regs;
+ if (BPF_SRC(insn->code) == BPF_K) {
+ mark_reg_unknown(env, regs, insn->dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X) {
+ mark_reg_unknown(env, regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->src_reg);
+ }
+ }
+ return branch;
+}
+
static int sanitize_ptr_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn,
const struct bpf_reg_state *ptr_reg,
@@ -6566,12 +6587,26 @@ do_sim:
tmp = *dst_reg;
*dst_reg = *ptr_reg;
}
- ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+ ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+ env->insn_idx);
if (!ptr_is_dst_reg && ret)
*dst_reg = tmp;
return !ret ? REASON_STACK : 0;
}
+static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+
+ /* If we simulate paths under speculation, we don't update the
+ * insn as 'seen' such that when we verify unreachable paths in
+ * the non-speculative domain, sanitize_dead_code() can still
+ * rewrite/sanitize them.
+ */
+ if (!vstate->speculative)
+ env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+}
+
static int sanitize_err(struct bpf_verifier_env *env,
const struct bpf_insn *insn, int reason,
const struct bpf_reg_state *off_reg,
@@ -8750,14 +8785,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (err)
return err;
}
+
if (pred == 1) {
- /* only follow the goto, ignore fall-through */
+ /* Only follow the goto, ignore fall-through. If needed, push
+ * the fall-through branch for simulation under speculative
+ * execution.
+ */
+ if (!env->bypass_spec_v1 &&
+ !sanitize_speculative_path(env, insn, *insn_idx + 1,
+ *insn_idx))
+ return -EFAULT;
*insn_idx += insn->off;
return 0;
} else if (pred == 0) {
- /* only follow fall-through branch, since
- * that's where the program will go
+ /* Only follow the fall-through branch, since that's where the
+ * program will go. If needed, push the goto branch for
+ * simulation under speculative execution.
*/
+ if (!env->bypass_spec_v1 &&
+ !sanitize_speculative_path(env, insn,
+ *insn_idx + insn->off + 1,
+ *insn_idx))
+ return -EFAULT;
return 0;
}
@@ -10630,7 +10679,7 @@ static int do_check(struct bpf_verifier_env *env)
}
regs = cur_regs(env);
- env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+ sanitize_mark_insn_seen(env);
prev_insn_idx = env->insn_idx;
if (class == BPF_ALU || class == BPF_ALU64) {
@@ -10857,7 +10906,7 @@ process_bpf_exit:
return err;
env->insn_idx++;
- env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+ sanitize_mark_insn_seen(env);
} else {
verbose(env, "invalid BPF_LD mode\n");
return -EINVAL;
@@ -11366,6 +11415,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
{
struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
struct bpf_insn *insn = new_prog->insnsi;
+ u32 old_seen = old_data[off].seen;
u32 prog_len;
int i;
@@ -11386,7 +11436,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
memcpy(new_data + off + cnt - 1, old_data + off,
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
for (i = off; i < off + cnt - 1; i++) {
- new_data[i].seen = env->pass_cnt;
+ /* Expand insni[off]'s seen count to the patched range. */
+ new_data[i].seen = old_seen;
new_data[i].zext_dst = insn_has_def32(env, insn + i);
}
env->insn_aux_data = new_data;
@@ -12710,6 +12761,9 @@ static void free_states(struct bpf_verifier_env *env)
* insn_aux_data was touched. These variables are compared to clear temporary
* data from failed pass. For testing and experiments do_check_common() can be
* run multiple times even when prior attempt to verify is unsuccessful.
+ *
+ * Note that special handling is needed on !env->bypass_spec_v1 if this is
+ * ever called outside of error path with subsequent program rejection.
*/
static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
{
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8190b6bfc978..1f274d7fc934 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -820,6 +820,10 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
struct cgroup *cgrp = kn->priv;
int ret;
+ /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
+ if (strchr(new_name_str, '\n'))
+ return -EINVAL;
+
if (kernfs_type(kn) != KERNFS_DIR)
return -ENOTDIR;
if (kn->parent != new_parent)
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 825284baaf46..684a6061a13a 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
VMCOREINFO_STRUCT_SIZE(mem_section);
VMCOREINFO_OFFSET(mem_section, section_mem_map);
+ VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
#endif
VMCOREINFO_STRUCT_SIZE(page);
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index a0b3b04fb596..bf16395b9e13 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -5,6 +5,7 @@
#include <linux/highmem.h>
#include <linux/livepatch.h>
#include <linux/audit.h>
+#include <linux/tick.h>
#include "common.h"
@@ -186,7 +187,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
local_irq_disable_exit_to_user();
/* Check if any of the above work has queued a deferred wakeup */
- rcu_nocb_flush_deferred_wakeup();
+ tick_nohz_user_enter_prepare();
ti_work = READ_ONCE(current_thread_info()->flags);
}
@@ -202,7 +203,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
lockdep_assert_irqs_disabled();
/* Flush pending rcuog wakeup before the last need_resched() check */
- rcu_nocb_flush_deferred_wakeup();
+ tick_nohz_user_enter_prepare();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6fee4a7e88d7..fe88d6eea3c2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4609,7 +4609,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
+ raw_spin_lock_irqsave(&ctx->lock, flags);
++ctx->pin_count;
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
return ctx;
}
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 23a7a0ba1388..db8c248ebc8c 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -70,9 +70,6 @@ bool irq_work_queue(struct irq_work *work)
if (!irq_work_claim(work))
return false;
- /*record irq_work call stack in order to print it in KASAN reports*/
- kasan_record_aux_stack(work);
-
/* Queue the entry and raise the IPI if needed. */
preempt_disable();
__irq_work_queue_local(work);
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index 7a1414622051..94232186fccb 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -391,6 +391,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
/* No obstacles. */
return vprintk_default(fmt, args);
}
+EXPORT_SYMBOL(vprintk);
void __init printk_safe_init(void)
{
@@ -411,4 +412,3 @@ void __init printk_safe_init(void)
/* Flush pending messages that did not have scheduled IRQ works. */
printk_safe_flush();
}
-EXPORT_SYMBOL(vprintk);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5226cc26a095..4ca80df205ce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6389,7 +6389,6 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
{
return __sched_setscheduler(p, attr, false, true);
}
-EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
/**
* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9c882f20803e..c5aacbd492a1 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -885,6 +885,7 @@ static const struct seq_operations sched_debug_sops = {
#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
#define __P(F) __PS(#F, F)
#define P(F) __PS(#F, p->F)
+#define PM(F, M) __PS(#F, p->F & (M))
#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
#define __PN(F) __PSN(#F, F)
#define PN(F) __PSN(#F, p->F)
@@ -1011,7 +1012,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(se.avg.util_avg);
P(se.avg.last_update_time);
P(se.avg.util_est.ewma);
- P(se.avg.util_est.enqueued);
+ PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED);
#endif
#ifdef CONFIG_UCLAMP_TASK
__PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3248e24a90b0..2c8a9352590d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3499,10 +3499,9 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
static inline void
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+ long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
unsigned long load_avg;
u64 load_sum = 0;
- s64 delta_sum;
u32 divider;
if (!runnable_sum)
@@ -3549,13 +3548,13 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
load_sum = (s64)se_weight(se) * runnable_sum;
load_avg = div_s64(load_sum, divider);
- delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
- delta_avg = load_avg - se->avg.load_avg;
+ delta = load_avg - se->avg.load_avg;
se->avg.load_sum = runnable_sum;
se->avg.load_avg = load_avg;
- add_positive(&cfs_rq->avg.load_avg, delta_avg);
- add_positive(&cfs_rq->avg.load_sum, delta_sum);
+
+ add_positive(&cfs_rq->avg.load_avg, delta);
+ cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
}
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3766,11 +3765,17 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
*/
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
+ /*
+ * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+ * See ___update_load_avg() for details.
+ */
+ u32 divider = get_pelt_divider(&cfs_rq->avg);
+
dequeue_load_avg(cfs_rq, se);
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
- sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+ cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
- sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+ cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
@@ -3902,7 +3907,7 @@ static inline unsigned long _task_util_est(struct task_struct *p)
{
struct util_est ue = READ_ONCE(p->se.avg.util_est);
- return (max(ue.ewma, ue.enqueued) | UTIL_AVG_UNCHANGED);
+ return max(ue.ewma, (ue.enqueued & ~UTIL_AVG_UNCHANGED));
}
static inline unsigned long task_util_est(struct task_struct *p)
@@ -4002,7 +4007,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
* Reset EWMA on utilization increases, the moving average is used only
* to smooth utilization decreases.
*/
- ue.enqueued = (task_util(p) | UTIL_AVG_UNCHANGED);
+ ue.enqueued = task_util(p);
if (sched_feat(UTIL_EST_FASTUP)) {
if (ue.ewma < ue.enqueued) {
ue.ewma = ue.enqueued;
@@ -4051,6 +4056,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
ue.ewma += last_ewma_diff;
ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
done:
+ ue.enqueued |= UTIL_AVG_UNCHANGED;
WRITE_ONCE(p->se.avg.util_est, ue);
trace_sched_util_est_se_tp(&p->se);
@@ -8030,7 +8036,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
/* Propagate pending load changes to the parent, if any: */
se = cfs_rq->tg->se[cpu];
if (se && !skip_blocked_update(se))
- update_load_avg(cfs_rq_of(se), se, 0);
+ update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
/*
* There can be a lot of idle CPU cgroups. Don't let fully
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 1462846d244e..cfe94ffd2b38 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -42,15 +42,6 @@ static inline u32 get_pelt_divider(struct sched_avg *avg)
return LOAD_AVG_MAX - 1024 + avg->period_contrib;
}
-/*
- * When a task is dequeued, its estimated utilization should not be update if
- * its util_avg has not been updated at least once.
- * This flag is used to synchronize util_avg updates with util_est updates.
- * We map this information into the LSB bit of the utilization saved at
- * dequeue time (i.e. util_est.dequeued).
- */
-#define UTIL_AVG_UNCHANGED 0x1
-
static inline void cfs_se_util_change(struct sched_avg *avg)
{
unsigned int enqueued;
@@ -58,7 +49,7 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
if (!sched_feat(UTIL_EST))
return;
- /* Avoid store if the flag has been already set */
+ /* Avoid store if the flag has been already reset */
enqueued = avg->util_est.enqueued;
if (!(enqueued & UTIL_AVG_UNCHANGED))
return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 828b091501ca..6784f27a3099 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -230,6 +230,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
+EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
bool tick_nohz_full_running;
EXPORT_SYMBOL_GPL(tick_nohz_full_running);
static atomic_t tick_dep_mask;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2e8a3fde7104..72ef4dccbcc4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1967,12 +1967,18 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
static void print_ip_ins(const char *fmt, const unsigned char *p)
{
+ char ins[MCOUNT_INSN_SIZE];
int i;
+ if (copy_from_kernel_nofault(ins, p, MCOUNT_INSN_SIZE)) {
+ printk(KERN_CONT "%s[FAULT] %px\n", fmt, p);
+ return;
+ }
+
printk(KERN_CONT "%s", fmt);
for (i = 0; i < MCOUNT_INSN_SIZE; i++)
- printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+ printk(KERN_CONT "%s%02x", i ? ":" : "", ins[i]);
}
enum ftrace_bug_type ftrace_bug_type;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a21ef9cd2aae..d23a09d3eb37 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2198,9 +2198,6 @@ struct saved_cmdlines_buffer {
};
static struct saved_cmdlines_buffer *savedcmd;
-/* temporary disable recording */
-static atomic_t trace_record_taskinfo_disabled __read_mostly;
-
static inline char *get_saved_cmdlines(int idx)
{
return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
@@ -2486,8 +2483,6 @@ static bool tracing_record_taskinfo_skip(int flags)
{
if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
return true;
- if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
- return true;
if (!__this_cpu_read(trace_taskinfo_save))
return true;
return false;
@@ -2736,7 +2731,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
val = this_cpu_inc_return(trace_buffered_event_cnt);
- if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
+ if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
trace_event_setup(entry, type, trace_ctx);
entry->array[0] = len;
return entry;
@@ -3998,9 +3993,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-EBUSY);
#endif
- if (!iter->snapshot)
- atomic_inc(&trace_record_taskinfo_disabled);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
@@ -4043,9 +4035,6 @@ static void s_stop(struct seq_file *m, void *p)
return;
#endif
- if (!iter->snapshot)
- atomic_dec(&trace_record_taskinfo_disabled);
-
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index c1637f90c8a3..4702efb00ff2 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -115,9 +115,9 @@ u64 notrace trace_clock_global(void)
prev_time = READ_ONCE(trace_clock_struct.prev_time);
now = sched_clock_cpu(this_cpu);
- /* Make sure that now is always greater than prev_time */
+ /* Make sure that now is always greater than or equal to prev_time */
if ((s64)(now - prev_time) < 0)
- now = prev_time + 1;
+ now = prev_time;
/*
* If in an NMI context then dont risk lockups and simply return
@@ -131,7 +131,7 @@ u64 notrace trace_clock_global(void)
/* Reread prev_time in case it was already updated */
prev_time = READ_ONCE(trace_clock_struct.prev_time);
if ((s64)(now - prev_time) < 0)
- now = prev_time + 1;
+ now = prev_time;
trace_clock_struct.prev_time = now;