summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/bpf/btf.c14
-rw-r--r--kernel/bpf/cgroup.c1
-rw-r--r--kernel/bpf/core.c54
-rw-r--r--kernel/bpf/map_in_map.c17
-rw-r--r--kernel/bpf/stackmap.c12
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c371
-rw-r--r--kernel/compat.c18
-rw-r--r--kernel/dma/coherent.c2
-rw-r--r--kernel/dma/debug.c19
-rw-r--r--kernel/dma/mapping.c94
-rw-r--r--kernel/dma/remap.c13
-rw-r--r--kernel/dma/swiotlb.c2
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c19
-rw-r--r--kernel/futex.c35
-rw-r--r--kernel/hung_task.c20
-rw-r--r--kernel/jump_label.c10
-rw-r--r--kernel/kcov.c2
-rw-r--r--kernel/locking/mutex.c2
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/panic.c28
-rw-r--r--kernel/printk/printk.c4
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/rseq.c6
-rw-r--r--kernel/sched/core.c8
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/fair.c6
-rw-r--r--kernel/sched/sched.h6
-rw-r--r--kernel/sched/swait.c2
-rw-r--r--kernel/sched/wait.c2
-rw-r--r--kernel/seccomp.c4
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c5
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/trace_kprobe.c12
-rw-r--r--kernel/umh.c33
41 files changed, 568 insertions, 300 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index cde93d54c571..6aa7543bcdb2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -122,7 +122,11 @@ targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
- filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
+filechk_ikconfiggz = \
+ echo "static const char kernel_config_data[] __used = MAGIC_START"; \
+ cat $< | scripts/bin2c; \
+ echo "MAGIC_END;"
+
targets += config_data.h
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
$(call filechk,ikconfiggz)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 715f9fcf4712..befe570be5ba 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
return kind_ops[BTF_INFO_KIND(t->info)];
}
-bool btf_name_offset_valid(const struct btf *btf, u32 offset)
+static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
return BTF_STR_OFFSET_VALID(offset) &&
offset < btf->hdr.str_len;
@@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset,
u8 nr_copy_bits;
u64 print_num;
- data += BITS_ROUNDDOWN_BYTES(bits_offset);
- bits_offset = BITS_PER_BYTE_MASKED(bits_offset);
nr_copy_bits = nr_bits + bits_offset;
nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
@@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf,
* BTF_INT_OFFSET() cannot exceed 64 bits.
*/
total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
- btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m);
+ data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+ bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+ btf_bitfield_seq_show(data, bits_offset, nr_bits, m);
}
static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
@@ -2001,12 +2001,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
member_offset = btf_member_bit_offset(t, member);
bitfield_size = btf_member_bitfield_size(t, member);
+ bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
+ bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
if (bitfield_size) {
- btf_bitfield_seq_show(data, member_offset,
+ btf_bitfield_seq_show(data + bytes_offset, bits8_offset,
bitfield_size, m);
} else {
- bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
- bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
ops = btf_type_ops(member_type);
ops->seq_show(btf, member_type, member->type,
data + bytes_offset, bits8_offset, m);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9425c2fb872f..ab612fe9862f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
+ /* fall through */
default:
return NULL;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 38de580abcc2..f908b9356025 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -54,6 +54,7 @@
#define DST regs[insn->dst_reg]
#define SRC regs[insn->src_reg]
#define FP regs[BPF_REG_FP]
+#define AX regs[BPF_REG_AX]
#define ARG1 regs[BPF_REG_ARG1]
#define CTX regs[BPF_REG_CTX]
#define IMM insn->imm
@@ -857,6 +858,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
+ /* Constraints on AX register:
+ *
+ * AX register is inaccessible from user space. It is mapped in
+ * all JITs, and used here for constant blinding rewrites. It is
+ * typically "stateless" meaning its contents are only valid within
+ * the executed instruction, but not across several instructions.
+ * There are a few exceptions however which are further detailed
+ * below.
+ *
+ * Constant blinding is only used by JITs, not in the interpreter.
+ * The interpreter uses AX in some occasions as a local temporary
+ * register e.g. in DIV or MOD instructions.
+ *
+ * In restricted circumstances, the verifier can also use the AX
+ * register for rewrites as long as they do not interfere with
+ * the above cases!
+ */
+ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
+ goto out;
+
if (from->imm == 0 &&
(from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
@@ -1188,7 +1209,6 @@ bool bpf_opcode_in_insntable(u8 code)
*/
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
- u64 tmp;
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
static const void *jumptable[256] = {
@@ -1268,36 +1288,36 @@ select_insn:
(*(s64 *) &DST) >>= IMM;
CONT;
ALU64_MOD_X:
- div64_u64_rem(DST, SRC, &tmp);
- DST = tmp;
+ div64_u64_rem(DST, SRC, &AX);
+ DST = AX;
CONT;
ALU_MOD_X:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) SRC);
+ AX = (u32) DST;
+ DST = do_div(AX, (u32) SRC);
CONT;
ALU64_MOD_K:
- div64_u64_rem(DST, IMM, &tmp);
- DST = tmp;
+ div64_u64_rem(DST, IMM, &AX);
+ DST = AX;
CONT;
ALU_MOD_K:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) IMM);
+ AX = (u32) DST;
+ DST = do_div(AX, (u32) IMM);
CONT;
ALU64_DIV_X:
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- tmp = (u32) DST;
- do_div(tmp, (u32) SRC);
- DST = (u32) tmp;
+ AX = (u32) DST;
+ do_div(AX, (u32) SRC);
+ DST = (u32) AX;
CONT;
ALU64_DIV_K:
DST = div64_u64(DST, IMM);
CONT;
ALU_DIV_K:
- tmp = (u32) DST;
- do_div(tmp, (u32) IMM);
- DST = (u32) tmp;
+ AX = (u32) DST;
+ do_div(AX, (u32) IMM);
+ DST = (u32) AX;
CONT;
ALU_END_TO_BE:
switch (IMM) {
@@ -1553,7 +1573,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
- u64 regs[MAX_BPF_REG]; \
+ u64 regs[MAX_BPF_EXT_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
@@ -1566,7 +1586,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
- u64 regs[MAX_BPF_REG]; \
+ u64 regs[MAX_BPF_EXT_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
BPF_R1 = r1; \
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 99d243e1ad6e..52378d3e34b3 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -12,6 +12,7 @@
struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
{
struct bpf_map *inner_map, *inner_map_meta;
+ u32 inner_map_meta_size;
struct fd f;
f = fdget(inner_map_ufd);
@@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
return ERR_PTR(-EINVAL);
}
- inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER);
+ inner_map_meta_size = sizeof(*inner_map_meta);
+ /* In some cases verifier needs to access beyond just base map. */
+ if (inner_map->ops == &array_map_ops)
+ inner_map_meta_size = sizeof(struct bpf_array);
+
+ inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
if (!inner_map_meta) {
fdput(f);
return ERR_PTR(-ENOMEM);
@@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->key_size = inner_map->key_size;
inner_map_meta->value_size = inner_map->value_size;
inner_map_meta->map_flags = inner_map->map_flags;
- inner_map_meta->ops = inner_map->ops;
inner_map_meta->max_entries = inner_map->max_entries;
+ /* Misc members not needed in bpf_map_meta_equal() check. */
+ inner_map_meta->ops = inner_map->ops;
+ if (inner_map->ops == &array_map_ops) {
+ inner_map_meta->unpriv_array = inner_map->unpriv_array;
+ container_of(inner_map_meta, struct bpf_array, map)->index_mask =
+ container_of(inner_map, struct bpf_array, map)->index_mask;
+ }
+
fdput(f);
return inner_map_meta;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 90daf285de03..d43b14535827 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr,
if (nhdr->n_type == BPF_BUILD_ID &&
nhdr->n_namesz == sizeof("GNU") &&
- nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+ nhdr->n_descsz > 0 &&
+ nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
memcpy(build_id,
note_start + note_offs +
ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
- BPF_BUILD_ID_SIZE);
+ nhdr->n_descsz);
+ memset(build_id + nhdr->n_descsz, 0,
+ BPF_BUILD_ID_SIZE - nhdr->n_descsz);
return 0;
}
new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
return -EFAULT; /* page not mapped */
ret = -EINVAL;
- page_addr = page_address(page);
+ page_addr = kmap_atomic(page);
ehdr = (Elf32_Ehdr *)page_addr;
/* compare magic x7f "ELF" */
@@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
ret = stack_map_get_build_id_64(page_addr, build_id);
out:
+ kunmap_atomic(page_addr);
put_page(page);
return ret;
}
@@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
+ memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
}
return;
}
@@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
+ memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
continue;
}
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0607db304def..b155cd17c1bd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -79,7 +79,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
return -E2BIG;
- if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
+ if (unlikely(!access_ok(uaddr, actual_size)))
return -EFAULT;
if (actual_size <= expected_size)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 71d86e3024ae..56674a7c3778 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -710,6 +710,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
free_func_state(dst_state->frame[i]);
dst_state->frame[i] = NULL;
}
+ dst_state->speculative = src->speculative;
dst_state->curframe = src->curframe;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
@@ -754,7 +755,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
}
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
- int insn_idx, int prev_insn_idx)
+ int insn_idx, int prev_insn_idx,
+ bool speculative)
{
struct bpf_verifier_state *cur = env->cur_state;
struct bpf_verifier_stack_elem *elem;
@@ -772,6 +774,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
err = copy_verifier_state(&elem->st, cur);
if (err)
goto err;
+ elem->st.speculative |= speculative;
if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
verbose(env, "BPF program is too complex\n");
goto err;
@@ -1387,6 +1390,31 @@ static int check_stack_read(struct bpf_verifier_env *env,
}
}
+static int check_stack_access(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ /* Stack accesses must be at a fixed offset, so that we
+ * can determine what type of data were returned. See
+ * check_stack_read().
+ */
+ if (!tnum_is_const(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "variable stack access var_off=%s off=%d size=%d",
+ tn_buf, off, size);
+ return -EACCES;
+ }
+
+ if (off >= 0 || off < -MAX_BPF_STACK) {
+ verbose(env, "invalid stack off=%d size=%d\n", off, size);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
/* check read/write into map element returned by bpf_map_lookup_elem() */
static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size, bool zero_size_allowed)
@@ -1418,13 +1446,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
*/
if (env->log.level)
print_verifier_state(env, state);
+
/* The minimum value is only important with signed
* comparisons where we can't assume the floor of a
* value is 0. If we are using signed variables for our
* index'es we need to make sure that whatever we use
* will have a set floor within our range.
*/
- if (reg->smin_value < 0) {
+ if (reg->smin_value < 0 &&
+ (reg->smin_value == S64_MIN ||
+ (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
+ reg->smin_value + off < 0)) {
verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
regno);
return -EACCES;
@@ -1954,24 +1986,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
}
} else if (reg->type == PTR_TO_STACK) {
- /* stack accesses must be at a fixed offset, so that we can
- * determine what type of data were returned.
- * See check_stack_read().
- */
- if (!tnum_is_const(reg->var_off)) {
- char tn_buf[48];
-
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable stack access var_off=%s off=%d size=%d",
- tn_buf, off, size);
- return -EACCES;
- }
off += reg->var_off.value;
- if (off >= 0 || off < -MAX_BPF_STACK) {
- verbose(env, "invalid stack off=%d size=%d\n", off,
- size);
- return -EACCES;
- }
+ err = check_stack_access(env, reg, off, size);
+ if (err)
+ return err;
state = func(env, reg);
err = update_stack_depth(env, state, off);
@@ -3052,6 +3070,125 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
return true;
}
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+ return &env->insn_aux_data[env->insn_idx];
+}
+
+static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
+ u32 *ptr_limit, u8 opcode, bool off_is_neg)
+{
+ bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
+ (opcode == BPF_SUB && !off_is_neg);
+ u32 off;
+
+ switch (ptr_reg->type) {
+ case PTR_TO_STACK:
+ off = ptr_reg->off + ptr_reg->var_off.value;
+ if (mask_to_left)
+ *ptr_limit = MAX_BPF_STACK + off;
+ else
+ *ptr_limit = -off;
+ return 0;
+ case PTR_TO_MAP_VALUE:
+ if (mask_to_left) {
+ *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
+ } else {
+ off = ptr_reg->smin_value + ptr_reg->off;
+ *ptr_limit = ptr_reg->map_ptr->value_size - off;
+ }
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
+ const struct bpf_insn *insn)
+{
+ return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
+}
+
+static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
+ u32 alu_state, u32 alu_limit)
+{
+ /* If we arrived here from different branches with different
+ * state or limits to sanitize, then this won't work.
+ */
+ if (aux->alu_state &&
+ (aux->alu_state != alu_state ||
+ aux->alu_limit != alu_limit))
+ return -EACCES;
+
+ /* Corresponding fixup done in fixup_bpf_calls(). */
+ aux->alu_state = alu_state;
+ aux->alu_limit = alu_limit;
+ return 0;
+}
+
+static int sanitize_val_alu(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ struct bpf_insn_aux_data *aux = cur_aux(env);
+
+ if (can_skip_alu_sanitation(env, insn))
+ return 0;
+
+ return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
+}
+
+static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ const struct bpf_reg_state *ptr_reg,
+ struct bpf_reg_state *dst_reg,
+ bool off_is_neg)
+{
+ struct bpf_verifier_state *vstate = env->cur_state;
+ struct bpf_insn_aux_data *aux = cur_aux(env);
+ bool ptr_is_dst_reg = ptr_reg == dst_reg;
+ u8 opcode = BPF_OP(insn->code);
+ u32 alu_state, alu_limit;
+ struct bpf_reg_state tmp;
+ bool ret;
+
+ if (can_skip_alu_sanitation(env, insn))
+ return 0;
+
+ /* We already marked aux for masking from non-speculative
+ * paths, thus we got here in the first place. We only care
+ * to explore bad access from here.
+ */
+ if (vstate->speculative)
+ goto do_sim;
+
+ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= ptr_is_dst_reg ?
+ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+ if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
+ return 0;
+ if (update_alu_sanitation_state(aux, alu_state, alu_limit))
+ return -EACCES;
+do_sim:
+ /* Simulate and find potential out-of-bounds access under
+ * speculative execution from truncation as a result of
+ * masking when off was not within expected range. If off
+ * sits in dst, then we temporarily need to move ptr there
+ * to simulate dst (== 0) +/-= ptr. Needed, for example,
+ * for cases where we use K-based arithmetic in one direction
+ * and truncated reg-based in the other in order to explore
+ * bad access.
+ */
+ if (!ptr_is_dst_reg) {
+ tmp = *dst_reg;
+ *dst_reg = *ptr_reg;
+ }
+ ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+ if (!ptr_is_dst_reg)
+ *dst_reg = tmp;
+ return !ret ? -EFAULT : 0;
+}
+
/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
* Caller should also handle BPF_MOV case separately.
* If we return -EACCES, caller may want to try again treating pointer as a
@@ -3070,8 +3207,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
+ u32 dst = insn->dst_reg, src = insn->src_reg;
u8 opcode = BPF_OP(insn->code);
- u32 dst = insn->dst_reg;
+ int ret;
dst_reg = &regs[dst];
@@ -3104,6 +3242,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
+ case PTR_TO_MAP_VALUE:
+ if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
+ verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
+ off_reg == dst_reg ? dst : src);
+ return -EACCES;
+ }
+ /* fall-through */
default:
break;
}
@@ -3120,6 +3265,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
switch (opcode) {
case BPF_ADD:
+ ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
+ if (ret < 0) {
+ verbose(env, "R%d tried to add from different maps or paths\n", dst);
+ return ret;
+ }
/* We can take a fixed offset as long as it doesn't overflow
* the s32 'off' field
*/
@@ -3170,6 +3320,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
break;
case BPF_SUB:
+ ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
+ if (ret < 0) {
+ verbose(env, "R%d tried to sub from different maps or paths\n", dst);
+ return ret;
+ }
if (dst_reg == off_reg) {
/* scalar -= pointer. Creates an unknown scalar */
verbose(env, "R%d tried to subtract pointer from scalar\n",
@@ -3249,6 +3404,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
__update_reg_bounds(dst_reg);
__reg_deduce_bounds(dst_reg);
__reg_bound_offset(dst_reg);
+
+ /* For unprivileged we require that resulting offset must be in bounds
+ * in order to be able to sanitize access later on.
+ */
+ if (!env->allow_ptr_leaks) {
+ if (dst_reg->type == PTR_TO_MAP_VALUE &&
+ check_map_access(env, dst, dst_reg->off, 1, false)) {
+ verbose(env, "R%d pointer arithmetic of map value goes out of range, "
+ "prohibited for !root\n", dst);
+ return -EACCES;
+ } else if (dst_reg->type == PTR_TO_STACK &&
+ check_stack_access(env, dst_reg, dst_reg->off +
+ dst_reg->var_off.value, 1)) {
+ verbose(env, "R%d stack pointer arithmetic goes out of range, "
+ "prohibited for !root\n", dst);
+ return -EACCES;
+ }
+ }
+
return 0;
}
@@ -3267,6 +3441,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
s64 smin_val, smax_val;
u64 umin_val, umax_val;
u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
+ u32 dst = insn->dst_reg;
+ int ret;
if (insn_bitness == 32) {
/* Relevant for 32-bit RSH: Information can propagate towards
@@ -3301,6 +3477,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
switch (opcode) {
case BPF_ADD:
+ ret = sanitize_val_alu(env, insn);
+ if (ret < 0) {
+ verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
+ return ret;
+ }
if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
signed_add_overflows(dst_reg->smax_value, smax_val)) {
dst_reg->smin_value = S64_MIN;
@@ -3320,6 +3501,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
break;
case BPF_SUB:
+ ret = sanitize_val_alu(env, insn);
+ if (ret < 0) {
+ verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
+ return ret;
+ }
if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
signed_sub_overflows(dst_reg->smax_value, smin_val)) {
/* Overflow possible, we know nothing */
@@ -4348,7 +4534,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
}
}
- other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
+ other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
+ false);
if (!other_branch)
return -EFAULT;
other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
@@ -5458,6 +5645,12 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->curframe != cur->curframe)
return false;
+ /* Verification state from speculative execution simulation
+ * must never prune a non-speculative execution one.
+ */
+ if (old->speculative && !cur->speculative)
+ return false;
+
/* for states to be equal callsites have to be the same
* and all frame states need to be equivalent
*/
@@ -5650,7 +5843,6 @@ static int do_check(struct bpf_verifier_env *env)
struct bpf_insn *insns = env->prog->insnsi;
struct bpf_reg_state *regs;
int insn_cnt = env->prog->len, i;
- int insn_idx, prev_insn_idx = 0;
int insn_processed = 0;
bool do_print_state = false;
@@ -5660,6 +5852,7 @@ static int do_check(struct bpf_verifier_env *env)
if (!state)
return -ENOMEM;
state->curframe = 0;
+ state->speculative = false;
state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
if (!state->frame[0]) {
kfree(state);
@@ -5670,19 +5863,19 @@ static int do_check(struct bpf_verifier_env *env)
BPF_MAIN_FUNC /* callsite */,
0 /* frameno */,
0 /* subprogno, zero == main subprog */);
- insn_idx = 0;
+
for (;;) {
struct bpf_insn *insn;
u8 class;
int err;
- if (insn_idx >= insn_cnt) {
+ if (env->insn_idx >= insn_cnt) {
verbose(env, "invalid insn idx %d insn_cnt %d\n",
- insn_idx, insn_cnt);
+ env->insn_idx, insn_cnt);
return -EFAULT;
}
- insn = &insns[insn_idx];
+ insn = &insns[env->insn_idx];
class = BPF_CLASS(insn->code);
if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
@@ -5692,17 +5885,19 @@ static int do_check(struct bpf_verifier_env *env)
return -E2BIG;
}
- err = is_state_visited(env, insn_idx);
+ err = is_state_visited(env, env->insn_idx);
if (err < 0)
return err;
if (err == 1) {
/* found equivalent state, can prune the search */
if (env->log.level) {
if (do_print_state)
- verbose(env, "\nfrom %d to %d: safe\n",
- prev_insn_idx, insn_idx);
+ verbose(env, "\nfrom %d to %d%s: safe\n",
+ env->prev_insn_idx, env->insn_idx,
+ env->cur_state->speculative ?
+ " (speculative execution)" : "");
else
- verbose(env, "%d: safe\n", insn_idx);
+ verbose(env, "%d: safe\n", env->insn_idx);
}
goto process_bpf_exit;
}
@@ -5715,10 +5910,12 @@ static int do_check(struct bpf_verifier_env *env)
if (env->log.level > 1 || (env->log.level && do_print_state)) {
if (env->log.level > 1)
- verbose(env, "%d:", insn_idx);
+ verbose(env, "%d:", env->insn_idx);
else
- verbose(env, "\nfrom %d to %d:",
- prev_insn_idx, insn_idx);
+ verbose(env, "\nfrom %d to %d%s:",
+ env->prev_insn_idx, env->insn_idx,
+ env->cur_state->speculative ?
+ " (speculative execution)" : "");
print_verifier_state(env, state->frame[state->curframe]);
do_print_state = false;
}
@@ -5729,20 +5926,20 @@ static int do_check(struct bpf_verifier_env *env)
.private_data = env,
};
- verbose_linfo(env, insn_idx, "; ");
- verbose(env, "%d: ", insn_idx);
+ verbose_linfo(env, env->insn_idx, "; ");
+ verbose(env, "%d: ", env->insn_idx);
print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
}
if (bpf_prog_is_dev_bound(env->prog->aux)) {
- err = bpf_prog_offload_verify_insn(env, insn_idx,
- prev_insn_idx);
+ err = bpf_prog_offload_verify_insn(env, env->insn_idx,
+ env->prev_insn_idx);
if (err)
return err;
}
regs = cur_regs(env);
- env->insn_aux_data[insn_idx].seen = true;
+ env->insn_aux_data[env->insn_idx].seen = true;
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
@@ -5768,13 +5965,13 @@ static int do_check(struct bpf_verifier_env *env)
/* check that memory (src_reg + off) is readable,
* the state of dst_reg will be updated by this func
*/
- err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ,
- insn->dst_reg, false);
+ err = check_mem_access(env, env->insn_idx, insn->src_reg,
+ insn->off, BPF_SIZE(insn->code),
+ BPF_READ, insn->dst_reg, false);
if (err)
return err;
- prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+ prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
if (*prev_src_type == NOT_INIT) {
/* saw a valid insn
@@ -5799,10 +5996,10 @@ static int do_check(struct bpf_verifier_env *env)
enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
- err = check_xadd(env, insn_idx, insn);
+ err = check_xadd(env, env->insn_idx, insn);
if (err)
return err;
- insn_idx++;
+ env->insn_idx++;
continue;
}
@@ -5818,13 +6015,13 @@ static int do_check(struct bpf_verifier_env *env)
dst_reg_type = regs[insn->dst_reg].type;
/* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE,
- insn->src_reg, false);
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+ insn->off, BPF_SIZE(insn->code),
+ BPF_WRITE, insn->src_reg, false);
if (err)
return err;
- prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+ prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
if (*prev_dst_type == NOT_INIT) {
*prev_dst_type = dst_reg_type;
@@ -5852,9 +6049,9 @@ static int do_check(struct bpf_verifier_env *env)
}
/* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE,
- -1, false);
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+ insn->off, BPF_SIZE(insn->code),
+ BPF_WRITE, -1, false);
if (err)
return err;
@@ -5872,9 +6069,9 @@ static int do_check(struct bpf_verifier_env *env)
}
if (insn->src_reg == BPF_PSEUDO_CALL)
- err = check_func_call(env, insn, &insn_idx);
+ err = check_func_call(env, insn, &env->insn_idx);
else
- err = check_helper_call(env, insn->imm, insn_idx);
+ err = check_helper_call(env, insn->imm, env->insn_idx);
if (err)
return err;
@@ -5887,7 +6084,7 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- insn_idx += insn->off + 1;
+ env->insn_idx += insn->off + 1;
continue;
} else if (opcode == BPF_EXIT) {
@@ -5901,8 +6098,8 @@ static int do_check(struct bpf_verifier_env *env)
if (state->curframe) {
/* exit from nested function */
- prev_insn_idx = insn_idx;
- err = prepare_func_exit(env, &insn_idx);
+ env->prev_insn_idx = env->insn_idx;
+ err = prepare_func_exit(env, &env->insn_idx);
if (err)
return err;
do_print_state = true;
@@ -5932,7 +6129,8 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
process_bpf_exit:
- err = pop_stack(env, &prev_insn_idx, &insn_idx);
+ err = pop_stack(env, &env->prev_insn_idx,
+ &env->insn_idx);
if (err < 0) {
if (err != -ENOENT)
return err;
@@ -5942,7 +6140,7 @@ process_bpf_exit:
continue;
}
} else {
- err = check_cond_jmp_op(env, insn, &insn_idx);
+ err = check_cond_jmp_op(env, insn, &env->insn_idx);
if (err)
return err;
}
@@ -5959,8 +6157,8 @@ process_bpf_exit:
if (err)
return err;
- insn_idx++;
- env->insn_aux_data[insn_idx].seen = true;
+ env->insn_idx++;
+ env->insn_aux_data[env->insn_idx].seen = true;
} else {
verbose(env, "invalid BPF_LD mode\n");
return -EINVAL;
@@ -5970,7 +6168,7 @@ process_bpf_exit:
return -EINVAL;
}
- insn_idx++;
+ env->insn_idx++;
}
verbose(env, "processed %d insns (limit %d), stack depth ",
@@ -6709,6 +6907,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
continue;
}
+ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
+ insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
+ const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
+ const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
+ struct bpf_insn insn_buf[16];
+ struct bpf_insn *patch = &insn_buf[0];
+ bool issrc, isneg;
+ u32 off_reg;
+
+ aux = &env->insn_aux_data[i + delta];
+ if (!aux->alu_state)
+ continue;
+
+ isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
+ issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
+ BPF_ALU_SANITIZE_SRC;
+
+ off_reg = issrc ? insn->src_reg : insn->dst_reg;
+ if (isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
+ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+ if (issrc) {
+ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
+ off_reg);
+ insn->src_reg = BPF_REG_AX;
+ } else {
+ *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
+ BPF_REG_AX);
+ }
+ if (isneg)
+ insn->code = insn->code == code_add ?
+ code_sub : code_add;
+ *patch++ = *insn;
+ if (issrc && isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ cnt = patch - insn_buf;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
if (insn->src_reg == BPF_PSEUDO_CALL)
diff --git a/kernel/compat.c b/kernel/compat.c
index 089d00d0da9c..f01affa17e22 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -95,28 +95,28 @@ int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc)
static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv)
{
- return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
+ return (!access_ok(ctv, sizeof(*ctv)) ||
__get_user(tv->tv_sec, &ctv->tv_sec) ||
__get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
}
static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv)
{
- return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
+ return (!access_ok(ctv, sizeof(*ctv)) ||
__put_user(tv->tv_sec, &ctv->tv_sec) ||
__put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
}
static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts)
{
- return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
+ return (!access_ok(cts, sizeof(*cts)) ||
__get_user(ts->tv_sec, &cts->tv_sec) ||
__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
}
static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts)
{
- return (!access_ok(VERIFY_WRITE, cts, sizeof(*cts)) ||
+ return (!access_ok(cts, sizeof(*cts)) ||
__put_user(ts->tv_sec, &cts->tv_sec) ||
__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
}
@@ -335,7 +335,7 @@ int get_compat_sigevent(struct sigevent *event,
const struct compat_sigevent __user *u_event)
{
memset(event, 0, sizeof(*event));
- return (!access_ok(VERIFY_READ, u_event, sizeof(*u_event)) ||
+ return (!access_ok(u_event, sizeof(*u_event)) ||
__get_user(event->sigev_value.sival_int,
&u_event->sigev_value.sival_int) ||
__get_user(event->sigev_signo, &u_event->sigev_signo) ||
@@ -354,10 +354,9 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
- if (!access_ok(VERIFY_READ, umask, bitmap_size / 8))
+ if (!user_access_begin(umask, bitmap_size / 8))
return -EFAULT;
- user_access_begin();
while (nr_compat_longs > 1) {
compat_ulong_t l1, l2;
unsafe_get_user(l1, umask++, Efault);
@@ -384,10 +383,9 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
- if (!access_ok(VERIFY_WRITE, umask, bitmap_size / 8))
+ if (!user_access_begin(umask, bitmap_size / 8))
return -EFAULT;
- user_access_begin();
while (nr_compat_longs > 1) {
unsigned long m = *mask++;
unsafe_put_user((compat_ulong_t)m, umask++, Efault);
@@ -438,7 +436,7 @@ void __user *compat_alloc_user_space(unsigned long len)
ptr = arch_compat_alloc_user_space(len);
- if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
+ if (unlikely(!access_ok(ptr, len)))
return NULL;
return ptr;
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 597d40893862..66f0fb7e9a3a 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -223,7 +223,6 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
*/
return mem->flags & DMA_MEMORY_EXCLUSIVE;
}
-EXPORT_SYMBOL(dma_alloc_from_dev_coherent);
void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
{
@@ -268,7 +267,6 @@ int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
return __dma_release_from_coherent(mem, order, vaddr);
}
-EXPORT_SYMBOL(dma_release_from_dev_coherent);
int dma_release_from_global_coherent(int order, void *vaddr)
{
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 164706da2a73..23cf5361bcf1 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -49,7 +49,6 @@
enum {
dma_debug_single,
- dma_debug_page,
dma_debug_sg,
dma_debug_coherent,
dma_debug_resource,
@@ -1300,8 +1299,7 @@ void debug_dma_map_single(struct device *dev, const void *addr,
EXPORT_SYMBOL(debug_dma_map_single);
void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
- size_t size, int direction, dma_addr_t dma_addr,
- bool map_single)
+ size_t size, int direction, dma_addr_t dma_addr)
{
struct dma_debug_entry *entry;
@@ -1316,7 +1314,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
return;
entry->dev = dev;
- entry->type = dma_debug_page;
+ entry->type = dma_debug_single;
entry->pfn = page_to_pfn(page);
entry->offset = offset,
entry->dev_addr = dma_addr;
@@ -1324,9 +1322,6 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
entry->direction = direction;
entry->map_err_type = MAP_ERR_NOT_CHECKED;
- if (map_single)
- entry->type = dma_debug_single;
-
check_for_stack(dev, page, offset);
if (!PageHighMem(page)) {
@@ -1378,10 +1373,10 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
EXPORT_SYMBOL(debug_dma_mapping_error);
void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
- size_t size, int direction, bool map_single)
+ size_t size, int direction)
{
struct dma_debug_entry ref = {
- .type = dma_debug_page,
+ .type = dma_debug_single,
.dev = dev,
.dev_addr = addr,
.size = size,
@@ -1390,10 +1385,6 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
if (unlikely(dma_debug_disabled()))
return;
-
- if (map_single)
- ref.type = dma_debug_single;
-
check_unmap(&ref);
}
EXPORT_SYMBOL(debug_dma_unmap_page);
@@ -1521,7 +1512,6 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
add_dma_entry(entry);
}
-EXPORT_SYMBOL(debug_dma_alloc_coherent);
void debug_dma_free_coherent(struct device *dev, size_t size,
void *virt, dma_addr_t addr)
@@ -1549,7 +1539,6 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
check_unmap(&ref);
}
-EXPORT_SYMBOL(debug_dma_free_coherent);
void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
int direction, dma_addr_t dma_addr)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index d7c34d2d1ba5..a11006b6d8e8 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -46,45 +46,6 @@ static int dmam_match(struct device *dev, void *res, void *match_data)
}
/**
- * dmam_alloc_coherent - Managed dma_alloc_coherent()
- * @dev: Device to allocate coherent memory for
- * @size: Size of allocation
- * @dma_handle: Out argument for allocated DMA handle
- * @gfp: Allocation flags
- *
- * Managed dma_alloc_coherent(). Memory allocated using this function
- * will be automatically released on driver detach.
- *
- * RETURNS:
- * Pointer to allocated memory on success, NULL on failure.
- */
-void *dmam_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp)
-{
- struct dma_devres *dr;
- void *vaddr;
-
- dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
- if (!dr)
- return NULL;
-
- vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp);
- if (!vaddr) {
- devres_free(dr);
- return NULL;
- }
-
- dr->vaddr = vaddr;
- dr->dma_handle = *dma_handle;
- dr->size = size;
-
- devres_add(dev, dr);
-
- return vaddr;
-}
-EXPORT_SYMBOL(dmam_alloc_coherent);
-
-/**
* dmam_free_coherent - Managed dma_free_coherent()
* @dev: Device to free coherent memory for
* @size: Size of allocation
@@ -144,61 +105,6 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
EXPORT_SYMBOL(dmam_alloc_attrs);
-#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
-
-static void dmam_coherent_decl_release(struct device *dev, void *res)
-{
- dma_release_declared_memory(dev);
-}
-
-/**
- * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
- * @dev: Device to declare coherent memory for
- * @phys_addr: Physical address of coherent memory to be declared
- * @device_addr: Device address of coherent memory to be declared
- * @size: Size of coherent memory to be declared
- * @flags: Flags
- *
- * Managed dma_declare_coherent_memory().
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void *res;
- int rc;
-
- res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL);
- if (!res)
- return -ENOMEM;
-
- rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
- flags);
- if (!rc)
- devres_add(dev, res);
- else
- devres_free(res);
-
- return rc;
-}
-EXPORT_SYMBOL(dmam_declare_coherent_memory);
-
-/**
- * dmam_release_declared_memory - Managed dma_release_declared_memory().
- * @dev: Device to release declared coherent memory for
- *
- * Managed dmam_release_declared_memory().
- */
-void dmam_release_declared_memory(struct device *dev)
-{
- WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL));
-}
-EXPORT_SYMBOL(dmam_release_declared_memory);
-
-#endif
-
/*
* Create scatter-list for the already allocated DMA buffer.
*/
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 18cc09fc27b9..7a723194ecbe 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -204,8 +204,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
ret = dma_alloc_from_pool(size, &page, flags);
if (!ret)
return NULL;
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
- return ret;
+ goto done;
}
page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
@@ -215,8 +214,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
- if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
- return page; /* opaque cookie */
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+ ret = page; /* opaque cookie */
+ goto done;
+ }
/* create a coherent mapping */
ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
@@ -227,9 +228,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
return ret;
}
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
memset(ret, 0, size);
-
+done:
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
return ret;
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d6361776dc5c..1fb6fd68b9c7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -378,6 +378,8 @@ void __init swiotlb_exit(void)
memblock_free_late(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
+ io_tlb_start = 0;
+ io_tlb_end = 0;
io_tlb_nslabs = 0;
max_segment = 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67ecac337374..3cd13a30f732 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10135,7 +10135,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
u32 size;
int ret;
- if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+ if (!access_ok(uattr, PERF_ATTR_SIZE_VER0))
return -EFAULT;
/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 0e21e6d21f35..284f2fe9a293 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -866,6 +866,7 @@ void __noreturn do_exit(long code)
exit_task_namespaces(tsk);
exit_task_work(tsk);
exit_thread(tsk);
+ exit_umh(tsk);
/*
* Flush inherited counters to the parent - before the parent
@@ -1604,10 +1605,9 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
if (!infop)
return err;
- if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
+ if (!user_access_begin(infop, sizeof(*infop)))
return -EFAULT;
- user_access_begin();
unsafe_put_user(signo, &infop->si_signo, Efault);
unsafe_put_user(0, &infop->si_errno, Efault);
unsafe_put_user(info.cause, &infop->si_code, Efault);
@@ -1732,10 +1732,9 @@ COMPAT_SYSCALL_DEFINE5(waitid,
if (!infop)
return err;
- if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
+ if (!user_access_begin(infop, sizeof(*infop)))
return -EFAULT;
- user_access_begin();
unsafe_put_user(signo, &infop->si_signo, Efault);
unsafe_put_user(0, &infop->si_errno, Efault);
unsafe_put_user(info.cause, &infop->si_code, Efault);
diff --git a/kernel/fork.c b/kernel/fork.c
index d439c48ecf18..b69248e6f0e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -164,10 +164,6 @@ static inline void free_task_struct(struct task_struct *tsk)
}
#endif
-void __weak arch_release_thread_stack(unsigned long *stack)
-{
-}
-
#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
/*
@@ -221,6 +217,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
memset(s->addr, 0, THREAD_SIZE);
tsk->stack_vm_area = s;
+ tsk->stack = s->addr;
return s->addr;
}
@@ -422,7 +419,6 @@ static void release_task_stack(struct task_struct *tsk)
return; /* Better to leak the stack than to free prematurely */
account_kernel_stack(tsk, -1);
- arch_release_thread_stack(tsk->stack);
free_thread_stack(tsk);
tsk->stack = NULL;
#ifdef CONFIG_VMAP_STACK
@@ -1838,8 +1834,6 @@ static __latent_entropy struct task_struct *copy_process(
posix_cpu_timers_init(p);
- p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
audit_set_context(p, NULL);
cgroup_fork(p);
@@ -2006,6 +2000,17 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_free_pid;
/*
+ * From this point on we must avoid any synchronous user-space
+ * communication until we take the tasklist-lock. In particular, we do
+ * not want user-space to be able to predict the process start-time by
+ * stalling fork(2) after we recorded the start_time but before it is
+ * visible to the system.
+ */
+
+ p->start_time = ktime_get_ns();
+ p->real_start_time = ktime_get_boot_ns();
+
+ /*
* Make it visible to the rest of the system, but dont wake it up yet.
* Need tasklist lock for parent etc handling!
*/
diff --git a/kernel/futex.c b/kernel/futex.c
index 054105854e0e..be3bff2315ff 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -481,13 +481,18 @@ static void drop_futex_key_refs(union futex_key *key)
}
}
+enum futex_access {
+ FUTEX_READ,
+ FUTEX_WRITE
+};
+
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
- * @rw: mapping needs to be read/write (values: VERIFY_READ,
- * VERIFY_WRITE)
+ * @rw: mapping needs to be read/write (values: FUTEX_READ,
+ * FUTEX_WRITE)
*
* Return: a negative error code or 0
*
@@ -500,7 +505,7 @@ static void drop_futex_key_refs(union futex_key *key)
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -516,7 +521,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
return -EINVAL;
address -= key->both.offset;
- if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
+ if (unlikely(!access_ok(uaddr, sizeof(u32))))
return -EFAULT;
if (unlikely(should_fail_futex(fshared)))
@@ -546,7 +551,7 @@ again:
* If write access is not required (eg. FUTEX_WAIT), try
* and get read-only access.
*/
- if (err == -EFAULT && rw == VERIFY_READ) {
+ if (err == -EFAULT && rw == FUTEX_READ) {
err = get_user_pages_fast(address, 1, 0, &page);
ro = 1;
}
@@ -1583,7 +1588,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
if (unlikely(ret != 0))
goto out;
@@ -1642,7 +1647,7 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
oparg = 1 << oparg;
}
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
@@ -1682,10 +1687,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
DEFINE_WAKE_Q(wake_q);
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1961,11 +1966,11 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
}
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
if (unlikely(ret != 0))
goto out;
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
- requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -2634,7 +2639,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
* while the syscall executes.
*/
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
if (unlikely(ret != 0))
return ret;
@@ -2793,7 +2798,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
}
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out;
@@ -2972,7 +2977,7 @@ retry:
if ((uval & FUTEX_TID_MASK) != vpid)
return -EPERM;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
if (ret)
return ret;
@@ -3199,7 +3204,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
rt_mutex_init_waiter(&rt_waiter);
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
goto out;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index cb8e3e8ac7b9..4a9191617076 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -34,7 +34,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
* is disabled during the critical section. It also controls the size of
* the RCU grace period. So it needs to be upper-bound.
*/
-#define HUNG_TASK_BATCHING 1024
+#define HUNG_TASK_LOCK_BREAK (HZ / 10)
/*
* Zero means infinite timeout - no checking done:
@@ -112,8 +112,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
trace_sched_process_hang(t);
- if (!sysctl_hung_task_warnings && !sysctl_hung_task_panic)
- return;
+ if (sysctl_hung_task_panic) {
+ console_verbose();
+ hung_task_show_lock = true;
+ hung_task_call_panic = true;
+ }
/*
* Ok, the task did not get scheduled for more than 2 minutes,
@@ -135,11 +138,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
}
touch_nmi_watchdog();
-
- if (sysctl_hung_task_panic) {
- hung_task_show_lock = true;
- hung_task_call_panic = true;
- }
}
/*
@@ -173,7 +171,7 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
int max_count = sysctl_hung_task_check_count;
- int batch_count = HUNG_TASK_BATCHING;
+ unsigned long last_break = jiffies;
struct task_struct *g, *t;
/*
@@ -188,10 +186,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
for_each_process_thread(g, t) {
if (!max_count--)
goto unlock;
- if (!--batch_count) {
- batch_count = HUNG_TASK_BATCHING;
+ if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
if (!rcu_lock_break(g, t))
goto unlock;
+ last_break = jiffies;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index b28028b08d44..bad96b476eb6 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -18,8 +18,6 @@
#include <linux/cpu.h>
#include <asm/sections.h>
-#ifdef HAVE_JUMP_LABEL
-
/* mutex to protect coming/going of the the jump_label table */
static DEFINE_MUTEX(jump_label_mutex);
@@ -80,13 +78,13 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
static void jump_label_update(struct static_key *key);
/*
- * There are similar definitions for the !HAVE_JUMP_LABEL case in jump_label.h.
+ * There are similar definitions for the !CONFIG_JUMP_LABEL case in jump_label.h.
* The use of 'atomic_read()' requires atomic.h and its problematic for some
* kernel headers such as kernel.h and others. Since static_key_count() is not
- * used in the branch statements as it is for the !HAVE_JUMP_LABEL case its ok
+ * used in the branch statements as it is for the !CONFIG_JUMP_LABEL case its ok
* to have it be a function here. Similarly, for 'static_key_enable()' and
* 'static_key_disable()', which require bug.h. This should allow jump_label.h
- * to be included from most/all places for HAVE_JUMP_LABEL.
+ * to be included from most/all places for CONFIG_JUMP_LABEL.
*/
int static_key_count(struct static_key *key)
{
@@ -791,5 +789,3 @@ static __init int jump_label_test(void)
}
early_initcall(jump_label_test);
#endif /* STATIC_KEYS_SELFTEST */
-
-#endif /* HAVE_JUMP_LABEL */
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 97959d7b77e2..c2277dbdbfb1 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -112,7 +112,7 @@ void notrace __sanitizer_cov_trace_pc(void)
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
-static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
+static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
{
struct task_struct *t;
u64 *area;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 3f8a35104285..db578783dd36 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -987,7 +987,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* wait_lock. This ensures the lock cancellation is ordered
* against mutex_unlock() and wake-ups do not go missing.
*/
- if (unlikely(signal_pending_state(state, current))) {
+ if (signal_pending_state(state, current)) {
ret = -EINTR;
goto err;
}
diff --git a/kernel/module.c b/kernel/module.c
index fcbc0128810b..2ad1b5239910 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3102,7 +3102,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->bpf_raw_events),
&mod->num_bpf_raw_events);
#endif
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
mod->jump_entries = section_objs(info, "__jump_table",
sizeof(*mod->jump_entries),
&mod->num_jump_entries);
diff --git a/kernel/panic.c b/kernel/panic.c
index d10c340c43b0..f121e6ba7e11 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -46,6 +46,13 @@ int panic_on_warn __read_mostly;
int panic_timeout = CONFIG_PANIC_TIMEOUT;
EXPORT_SYMBOL_GPL(panic_timeout);
+#define PANIC_PRINT_TASK_INFO 0x00000001
+#define PANIC_PRINT_MEM_INFO 0x00000002
+#define PANIC_PRINT_TIMER_INFO 0x00000004
+#define PANIC_PRINT_LOCK_INFO 0x00000008
+#define PANIC_PRINT_FTRACE_INFO 0x00000010
+unsigned long panic_print;
+
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
EXPORT_SYMBOL(panic_notifier_list);
@@ -125,6 +132,24 @@ void nmi_panic(struct pt_regs *regs, const char *msg)
}
EXPORT_SYMBOL(nmi_panic);
+static void panic_print_sys_info(void)
+{
+ if (panic_print & PANIC_PRINT_TASK_INFO)
+ show_state();
+
+ if (panic_print & PANIC_PRINT_MEM_INFO)
+ show_mem(0, NULL);
+
+ if (panic_print & PANIC_PRINT_TIMER_INFO)
+ sysrq_timer_list_show();
+
+ if (panic_print & PANIC_PRINT_LOCK_INFO)
+ debug_show_all_locks();
+
+ if (panic_print & PANIC_PRINT_FTRACE_INFO)
+ ftrace_dump(DUMP_ALL);
+}
+
/**
* panic - halt the system
* @fmt: The text string to print
@@ -254,6 +279,8 @@ void panic(const char *fmt, ...)
debug_locks_off();
console_flush_on_panic();
+ panic_print_sys_info();
+
if (!panic_blink)
panic_blink = no_blink;
@@ -658,6 +685,7 @@ void refcount_error_report(struct pt_regs *regs, const char *err)
#endif
core_param(panic, panic_timeout, int, 0644);
+core_param(panic_print, panic_print, ulong, 0644);
core_param(pause_on_oops, pause_on_oops, int, 0644);
core_param(panic_on_warn, panic_on_warn, int, 0644);
core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1306fe0c1dc6..d3d170374ceb 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1466,7 +1466,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
return -EINVAL;
if (!len)
return 0;
- if (!access_ok(VERIFY_WRITE, buf, len))
+ if (!access_ok(buf, len))
return -EFAULT;
error = wait_event_interruptible(log_wait,
syslog_seq != log_next_seq);
@@ -1484,7 +1484,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
return -EINVAL;
if (!len)
return 0;
- if (!access_ok(VERIFY_WRITE, buf, len))
+ if (!access_ok(buf, len))
return -EFAULT;
error = syslog_print_all(buf, len, clear);
break;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index c2cee9db5204..771e93f9c43f 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1073,7 +1073,7 @@ int ptrace_request(struct task_struct *child, long request,
struct iovec kiov;
struct iovec __user *uiov = datavp;
- if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
+ if (!access_ok(uiov, sizeof(*uiov)))
return -EFAULT;
if (__get_user(kiov.iov_base, &uiov->iov_base) ||
@@ -1229,7 +1229,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
compat_uptr_t ptr;
compat_size_t len;
- if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
+ if (!access_ok(uiov, sizeof(*uiov)))
return -EFAULT;
if (__get_user(ptr, &uiov->iov_base) ||
diff --git a/kernel/rseq.c b/kernel/rseq.c
index c6242d8594dc..25e9a7b60eba 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -267,7 +267,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(t->flags & PF_EXITING))
return;
- if (unlikely(!access_ok(VERIFY_WRITE, t->rseq, sizeof(*t->rseq))))
+ if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq))))
goto error;
ret = rseq_ip_fixup(regs);
if (unlikely(ret < 0))
@@ -295,7 +295,7 @@ void rseq_syscall(struct pt_regs *regs)
if (!t->rseq)
return;
- if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
+ if (!access_ok(t->rseq, sizeof(*t->rseq)) ||
rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
force_sig(SIGSEGV, t);
}
@@ -351,7 +351,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
rseq_len != sizeof(*rseq))
return -EINVAL;
- if (!access_ok(VERIFY_WRITE, rseq, rseq_len))
+ if (!access_ok(rseq, rseq_len))
return -EFAULT;
current->rseq = rseq;
current->rseq_len = rseq_len;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f66920173370..a674c7db2f29 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -24,7 +24,7 @@
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
/*
* Debugging: various feature bits
*
@@ -3416,7 +3416,7 @@ static void __sched notrace __schedule(bool preempt)
switch_count = &prev->nivcsw;
if (!preempt && prev->state) {
- if (unlikely(signal_pending_state(prev->state, prev))) {
+ if (signal_pending_state(prev->state, prev)) {
prev->state = TASK_RUNNING;
} else {
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
@@ -4450,7 +4450,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
u32 size;
int ret;
- if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
+ if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
return -EFAULT;
/* Zero the full structure, so that a short copy will be nice: */
@@ -4650,7 +4650,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,
{
int ret;
- if (!access_ok(VERIFY_WRITE, uattr, usize))
+ if (!access_ok(uattr, usize))
return -EFAULT;
/*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 02bd5f969b21..de3de997e245 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -73,7 +73,7 @@ static int sched_feat_show(struct seq_file *m, void *v)
return 0;
}
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
#define jump_label_key__true STATIC_KEY_INIT_TRUE
#define jump_label_key__false STATIC_KEY_INIT_FALSE
@@ -99,7 +99,7 @@ static void sched_feat_enable(int i)
#else
static void sched_feat_disable(int i) { };
static void sched_feat_enable(int i) { };
-#endif /* HAVE_JUMP_LABEL */
+#endif /* CONFIG_JUMP_LABEL */
static int sched_feat_set(char *cmp)
{
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6483834f1278..50aa2aba69bd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4217,7 +4217,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
#ifdef CONFIG_CFS_BANDWIDTH
-#ifdef HAVE_JUMP_LABEL
+#ifdef CONFIG_JUMP_LABEL
static struct static_key __cfs_bandwidth_used;
static inline bool cfs_bandwidth_used(void)
@@ -4234,7 +4234,7 @@ void cfs_bandwidth_usage_dec(void)
{
static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
}
-#else /* HAVE_JUMP_LABEL */
+#else /* CONFIG_JUMP_LABEL */
static bool cfs_bandwidth_used(void)
{
return true;
@@ -4242,7 +4242,7 @@ static bool cfs_bandwidth_used(void)
void cfs_bandwidth_usage_inc(void) {}
void cfs_bandwidth_usage_dec(void) {}
-#endif /* HAVE_JUMP_LABEL */
+#endif /* CONFIG_JUMP_LABEL */
/*
* default period for cfs group bandwidth.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0ba08924e017..d04530bf251f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1488,7 +1488,7 @@ enum {
#undef SCHED_FEAT
-#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
/*
* To support run-time toggling of sched features, all the translation units
@@ -1508,7 +1508,7 @@ static __always_inline bool static_branch_##name(struct static_key *key) \
extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
-#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
+#else /* !(SCHED_DEBUG && CONFIG_JUMP_LABEL) */
/*
* Each translation unit has its own copy of sysctl_sched_features to allow
@@ -1524,7 +1524,7 @@ static const_debug __maybe_unused unsigned int sysctl_sched_features =
#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
-#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
+#endif /* SCHED_DEBUG && CONFIG_JUMP_LABEL */
extern struct static_key_false sched_numa_balancing;
extern struct static_key_false sched_schedstats;
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 66b59ac77c22..e83a3f8449f6 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -93,7 +93,7 @@ long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait
long ret = 0;
raw_spin_lock_irqsave(&q->lock, flags);
- if (unlikely(signal_pending_state(state, current))) {
+ if (signal_pending_state(state, current)) {
/*
* See prepare_to_wait_event(). TL;DR, subsequent swake_up_one()
* must not see us.
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 5dd47f1103d1..6eb1f8efd221 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -264,7 +264,7 @@ long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_en
long ret = 0;
spin_lock_irqsave(&wq_head->lock, flags);
- if (unlikely(signal_pending_state(state, current))) {
+ if (signal_pending_state(state, current)) {
/*
* Exclusive waiter must not fail if it was selected by wakeup,
* it should "consume" the condition we were waiting for.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d7f538847b84..e815781ed751 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -976,6 +976,9 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
struct seccomp_filter *filter = file->private_data;
struct seccomp_knotif *knotif;
+ if (!filter)
+ return 0;
+
mutex_lock(&filter->notify_lock);
/*
@@ -1300,6 +1303,7 @@ out:
out_put_fd:
if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
if (ret < 0) {
+ listener_f->private_data = NULL;
fput(listener_f);
put_unused_fd(listener);
} else {
diff --git a/kernel/signal.c b/kernel/signal.c
index 53e07d97ffe0..e1d7ad8e6ab1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3997,7 +3997,7 @@ SYSCALL_DEFINE3(sigaction, int, sig,
if (act) {
old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ if (!access_ok(act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
@@ -4012,7 +4012,7 @@ SYSCALL_DEFINE3(sigaction, int, sig,
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ if (!access_ok(oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
@@ -4034,7 +4034,7 @@ COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
compat_uptr_t handler, restorer;
if (act) {
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ if (!access_ok(act, sizeof(*act)) ||
__get_user(handler, &act->sa_handler) ||
__get_user(restorer, &act->sa_restorer) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
@@ -4052,7 +4052,7 @@ COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ if (!access_ok(oact, sizeof(*oact)) ||
__put_user(ptr_to_compat(old_ka.sa.sa_handler),
&oact->sa_handler) ||
__put_user(ptr_to_compat(old_ka.sa.sa_restorer),
diff --git a/kernel/sys.c b/kernel/sys.c
index 64b5a230f38d..f7eb62eceb24 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1207,7 +1207,8 @@ DECLARE_RWSEM(uts_sem);
/*
* Work around broken programs that cannot handle "Linux 3.0".
* Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
- * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60.
+ * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be
+ * 2.6.60.
*/
static int override_release(char __user *release, size_t len)
{
@@ -2627,7 +2628,7 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
s.freehigh >>= bitcount;
}
- if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
+ if (!access_ok(info, sizeof(struct compat_sysinfo)) ||
__put_user(s.uptime, &info->uptime) ||
__put_user(s.loads[0], &info->loads[0]) ||
__put_user(s.loads[1], &info->loads[1]) ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1825f712e73b..ba4d9e85feb8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -807,6 +807,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "panic_print",
+ .data = &panic_print,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
#if defined CONFIG_PRINTK
{
.procname = "printk",
@@ -2787,6 +2794,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
bool neg;
left -= proc_skip_spaces(&p);
+ if (!left)
+ break;
err = proc_get_long(&p, &left, &val, &neg,
proc_wspace_sep,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 07148b497451..73c132095a7b 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -140,6 +140,7 @@ static const struct bin_table bin_kern_table[] = {
{ CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
{ CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
{ CTL_INT, KERN_PANIC_ON_WARN, "panic_on_warn" },
+ { CTL_ULONG, KERN_PANIC_PRINT, "panic_print" },
{}
};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9ddb6fddb4e0..8b068adb9da1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -170,7 +170,7 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
return -EPERM;
if (unlikely(uaccess_kernel()))
return -EPERM;
- if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
+ if (!access_ok(unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5c19b8c41c7e..d5fb09ebba8b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -607,11 +607,17 @@ static int trace_kprobe_create(int argc, const char *argv[])
char buf[MAX_EVENT_NAME_LEN];
unsigned int flags = TPARG_FL_KERNEL;
- /* argc must be >= 1 */
- if (argv[0][0] == 'r') {
+ switch (argv[0][0]) {
+ case 'r':
is_return = true;
flags |= TPARG_FL_RETURN;
- } else if (argv[0][0] != 'p' || argc < 2)
+ break;
+ case 'p':
+ break;
+ default:
+ return -ECANCELED;
+ }
+ if (argc < 2)
return -ECANCELED;
event = strchr(&argv[0][1], ':');
diff --git a/kernel/umh.c b/kernel/umh.c
index 0baa672e023c..d937cbad903a 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
static DEFINE_SPINLOCK(umh_sysctl_lock);
static DECLARE_RWSEM(umhelper_sem);
+static LIST_HEAD(umh_list);
+static DEFINE_MUTEX(umh_list_lock);
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
@@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data)
commit_creds(new);
sub_info->pid = task_pid_nr(current);
- if (sub_info->file)
+ if (sub_info->file) {
retval = do_execve_file(sub_info->file,
sub_info->argv, sub_info->envp);
- else
+ if (!retval)
+ current->flags |= PF_UMH;
+ } else
retval = do_execve(getname_kernel(sub_info->path),
(const char __user *const __user *)sub_info->argv,
(const char __user *const __user *)sub_info->envp);
@@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
goto out;
err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+ if (!err) {
+ mutex_lock(&umh_list_lock);
+ list_add(&info->list, &umh_list);
+ mutex_unlock(&umh_list_lock);
+ }
out:
fput(file);
return err;
@@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write,
return 0;
}
+void __exit_umh(struct task_struct *tsk)
+{
+ struct umh_info *info;
+ pid_t pid = tsk->pid;
+
+ mutex_lock(&umh_list_lock);
+ list_for_each_entry(info, &umh_list, list) {
+ if (info->pid == pid) {
+ list_del(&info->list);
+ mutex_unlock(&umh_list_lock);
+ goto out;
+ }
+ }
+ mutex_unlock(&umh_list_lock);
+ return;
+out:
+ if (info->cleanup)
+ info->cleanup(info);
+}
+
struct ctl_table usermodehelper_table[] = {
{
.procname = "bset",