summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/dma/remap.c4
-rw-r--r--kernel/events/core.c90
-rw-r--r--kernel/fork.c49
-rw-r--r--kernel/freezer.c6
-rwxr-xr-xkernel/gen_kheaders.sh5
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/sched/core.c43
-rw-r--r--kernel/sched/cputime.c6
-rw-r--r--kernel/sched/fair.c36
-rw-r--r--kernel/sched/membarrier.c1
-rw-r--r--kernel/time/tick-broadcast-hrtimer.c62
11 files changed, 133 insertions, 170 deletions
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index ca4e5d44b571..c00b9258fa6a 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -87,9 +87,9 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
*/
void dma_common_free_remap(void *cpu_addr, size_t size)
{
- struct page **pages = dma_common_find_pages(cpu_addr);
+ struct vm_struct *area = find_vm_area(cpu_addr);
- if (!pages) {
+ if (!area || area->flags != VM_DMA_COHERENT) {
WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
return;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4655adbbae10..9ec0b0bfddbd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3779,11 +3779,23 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
perf_event_groups_insert(&ctx->flexible_groups, event);
}
+/* pick an event from the flexible_groups to rotate */
static inline struct perf_event *
-ctx_first_active(struct perf_event_context *ctx)
+ctx_event_to_rotate(struct perf_event_context *ctx)
{
- return list_first_entry_or_null(&ctx->flexible_active,
- struct perf_event, active_list);
+ struct perf_event *event;
+
+ /* pick the first active flexible event */
+ event = list_first_entry_or_null(&ctx->flexible_active,
+ struct perf_event, active_list);
+
+ /* if no active flexible event, pick the first event */
+ if (!event) {
+ event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
+ typeof(*event), group_node);
+ }
+
+ return event;
}
static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
@@ -3808,9 +3820,9 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
perf_pmu_disable(cpuctx->ctx.pmu);
if (task_rotate)
- task_event = ctx_first_active(task_ctx);
+ task_event = ctx_event_to_rotate(task_ctx);
if (cpu_rotate)
- cpu_event = ctx_first_active(&cpuctx->ctx);
+ cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
/*
* As per the order given at ctx_resched() first 'pop' task flexible
@@ -5668,7 +5680,8 @@ again:
* undo the VM accounting.
*/
- atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+ atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
+ &mmap_user->locked_vm);
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);
@@ -5812,8 +5825,20 @@ accounting:
user_locked = atomic_long_read(&user->locked_vm) + user_extra;
- if (user_locked > user_lock_limit)
+ if (user_locked <= user_lock_limit) {
+ /* charge all to locked_vm */
+ } else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
+ /* charge all to pinned_vm */
+ extra = user_extra;
+ user_extra = 0;
+ } else {
+ /*
+ * charge locked_vm until it hits user_lock_limit;
+ * charge the rest from pinned_vm
+ */
extra = user_locked - user_lock_limit;
+ user_extra -= extra;
+ }
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
@@ -10586,55 +10611,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
u32 size;
int ret;
- if (!access_ok(uattr, PERF_ATTR_SIZE_VER0))
- return -EFAULT;
-
- /*
- * zero the full structure, so that a short copy will be nice.
- */
+ /* Zero the full structure, so that a short copy will be nice. */
memset(attr, 0, sizeof(*attr));
ret = get_user(size, &uattr->size);
if (ret)
return ret;
- if (size > PAGE_SIZE) /* silly large */
- goto err_size;
-
- if (!size) /* abi compat */
+ /* ABI compatibility quirk: */
+ if (!size)
size = PERF_ATTR_SIZE_VER0;
-
- if (size < PERF_ATTR_SIZE_VER0)
+ if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE)
goto err_size;
- /*
- * If we're handed a bigger struct than we know of,
- * ensure all the unknown bits are 0 - i.e. new
- * user-space does not rely on any kernel feature
- * extensions we dont know about yet.
- */
- if (size > sizeof(*attr)) {
- unsigned char __user *addr;
- unsigned char __user *end;
- unsigned char val;
-
- addr = (void __user *)uattr + sizeof(*attr);
- end = (void __user *)uattr + size;
-
- for (; addr < end; addr++) {
- ret = get_user(val, addr);
- if (ret)
- return ret;
- if (val)
- goto err_size;
- }
- size = sizeof(*attr);
+ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
+ if (ret) {
+ if (ret == -E2BIG)
+ goto err_size;
+ return ret;
}
- ret = copy_from_user(attr, uattr, size);
- if (ret)
- return -EFAULT;
-
attr->size = size;
if (attr->__reserved_1)
@@ -11891,6 +11887,10 @@ static int inherit_group(struct perf_event *parent_event,
child, leader, child_ctx);
if (IS_ERR(child_ctr))
return PTR_ERR(child_ctr);
+
+ if (sub->aux_event == parent_event &&
+ !perf_get_aux_event(child_ctr, leader))
+ return -EINVAL;
}
return 0;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index f9572f416126..bcdf53125210 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2525,39 +2525,19 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
#ifdef __ARCH_WANT_SYS_CLONE3
noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
struct clone_args __user *uargs,
- size_t size)
+ size_t usize)
{
+ int err;
struct clone_args args;
- if (unlikely(size > PAGE_SIZE))
+ if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
-
- if (unlikely(size < sizeof(struct clone_args)))
+ if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
return -EINVAL;
- if (unlikely(!access_ok(uargs, size)))
- return -EFAULT;
-
- if (size > sizeof(struct clone_args)) {
- unsigned char __user *addr;
- unsigned char __user *end;
- unsigned char val;
-
- addr = (void __user *)uargs + sizeof(struct clone_args);
- end = (void __user *)uargs + size;
-
- for (; addr < end; addr++) {
- if (get_user(val, addr))
- return -EFAULT;
- if (val)
- return -E2BIG;
- }
-
- size = sizeof(struct clone_args);
- }
-
- if (copy_from_user(&args, uargs, size))
- return -EFAULT;
+ err = copy_struct_from_user(&args, sizeof(args), uargs, usize);
+ if (err)
+ return err;
/*
* Verify that higher 32bits of exit_signal are unset and that
@@ -2604,6 +2584,17 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs)
return true;
}
+/**
+ * clone3 - create a new process with specific properties
+ * @uargs: argument structure
+ * @size: size of @uargs
+ *
+ * clone3() is the extensible successor to clone()/clone2().
+ * It takes a struct as argument that is versioned by its size.
+ *
+ * Return: On success, a positive PID for the child process.
+ * On error, a negative errno number.
+ */
SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
{
int err;
@@ -2934,7 +2925,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
struct ctl_table t;
int ret;
int threads = max_threads;
- int min = MIN_THREADS;
+ int min = 1;
int max = MAX_THREADS;
t = *table;
@@ -2946,7 +2937,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
if (ret || !write)
return ret;
- set_max_threads(threads);
+ max_threads = threads;
return 0;
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c0738424bb43..dc520f01f99d 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -22,12 +22,6 @@ EXPORT_SYMBOL(system_freezing_cnt);
bool pm_freezing;
bool pm_nosig_freezing;
-/*
- * Temporary export for the deadlock workaround in ata_scsi_hotplug().
- * Remove once the hack becomes unnecessary.
- */
-EXPORT_SYMBOL_GPL(pm_freezing);
-
/* protects freezing and frozen transitions */
static DEFINE_SPINLOCK(freezer_lock);
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 9ff449888d9c..aff79e461fc9 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -71,7 +71,10 @@ done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1
find $cpio_dir -type f -print0 |
xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
-tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null
+# Create archive and try to normalize metadata for reproducibility
+tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
+ --owner=0 --group=0 --sort=name --numeric-owner \
+ -Jcf $tarfile -C $cpio_dir/ . > /dev/null
echo "$src_files_md5" > kernel/kheaders.md5
echo "$obj_files_md5" >> kernel/kheaders.md5
diff --git a/kernel/panic.c b/kernel/panic.c
index 47e8ebccc22b..f470a038b05b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -180,6 +180,7 @@ void panic(const char *fmt, ...)
* after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();
+ preempt_disable_notrace();
/*
* It's possible to come here directly from a panic-assertion and
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7880f4f64d0e..dd05a378631a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5106,9 +5106,6 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
u32 size;
int ret;
- if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
- return -EFAULT;
-
/* Zero the full structure, so that a short copy will be nice: */
memset(attr, 0, sizeof(*attr));
@@ -5116,45 +5113,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
if (ret)
return ret;
- /* Bail out on silly large: */
- if (size > PAGE_SIZE)
- goto err_size;
-
/* ABI compatibility quirk: */
if (!size)
size = SCHED_ATTR_SIZE_VER0;
-
- if (size < SCHED_ATTR_SIZE_VER0)
+ if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
goto err_size;
- /*
- * If we're handed a bigger struct than we know of,
- * ensure all the unknown bits are 0 - i.e. new
- * user-space does not rely on any kernel feature
- * extensions we dont know about yet.
- */
- if (size > sizeof(*attr)) {
- unsigned char __user *addr;
- unsigned char __user *end;
- unsigned char val;
-
- addr = (void __user *)uattr + sizeof(*attr);
- end = (void __user *)uattr + size;
-
- for (; addr < end; addr++) {
- ret = get_user(val, addr);
- if (ret)
- return ret;
- if (val)
- goto err_size;
- }
- size = sizeof(*attr);
+ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
+ if (ret) {
+ if (ret == -E2BIG)
+ goto err_size;
+ return ret;
}
- ret = copy_from_user(attr, uattr, size);
- if (ret)
- return -EFAULT;
-
if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
size < SCHED_ATTR_SIZE_VER1)
return -EINVAL;
@@ -5354,7 +5325,7 @@ sched_attr_copy_to_user(struct sched_attr __user *uattr,
* sys_sched_getattr - similar to sched_getparam, but with sched_attr
* @pid: the pid in question.
* @uattr: structure containing the extended parameters.
- * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility.
+ * @usize: sizeof(attr) for fwd/bwd comp.
* @flags: for future extension.
*/
SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 2305ce89a26c..46ed4e1383e2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -740,7 +740,7 @@ void vtime_account_system(struct task_struct *tsk)
write_seqcount_begin(&vtime->seqcount);
/* We might have scheduled out from guest path */
- if (current->flags & PF_VCPU)
+ if (tsk->flags & PF_VCPU)
vtime_account_guest(tsk, vtime);
else
__vtime_account_system(tsk, vtime);
@@ -783,7 +783,7 @@ void vtime_guest_enter(struct task_struct *tsk)
*/
write_seqcount_begin(&vtime->seqcount);
__vtime_account_system(tsk, vtime);
- current->flags |= PF_VCPU;
+ tsk->flags |= PF_VCPU;
write_seqcount_end(&vtime->seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
@@ -794,7 +794,7 @@ void vtime_guest_exit(struct task_struct *tsk)
write_seqcount_begin(&vtime->seqcount);
vtime_account_guest(tsk, vtime);
- current->flags &= ~PF_VCPU;
+ tsk->flags &= ~PF_VCPU;
write_seqcount_end(&vtime->seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 83ab35e2374f..682a754ea3e1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4926,20 +4926,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
if (++count > 3) {
u64 new, old = ktime_to_ns(cfs_b->period);
- new = (old * 147) / 128; /* ~115% */
- new = min(new, max_cfs_quota_period);
-
- cfs_b->period = ns_to_ktime(new);
-
- /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
- cfs_b->quota *= new;
- cfs_b->quota = div64_u64(cfs_b->quota, old);
-
- pr_warn_ratelimited(
- "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
- smp_processor_id(),
- div_u64(new, NSEC_PER_USEC),
- div_u64(cfs_b->quota, NSEC_PER_USEC));
+ /*
+ * Grow period by a factor of 2 to avoid losing precision.
+ * Precision loss in the quota/period ratio can cause __cfs_schedulable
+ * to fail.
+ */
+ new = old * 2;
+ if (new < max_cfs_quota_period) {
+ cfs_b->period = ns_to_ktime(new);
+ cfs_b->quota *= 2;
+
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(new, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+ } else {
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(old, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+ }
/* reset count so we don't come right back in here */
count = 0;
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index a39bed2c784f..168479a7d61b 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -174,7 +174,6 @@ static int membarrier_private_expedited(int flags)
*/
if (cpu == raw_smp_processor_id())
continue;
- rcu_read_lock();
p = rcu_dereference(cpu_rq(cpu)->curr);
if (p && p->mm == mm)
__cpumask_set_cpu(cpu, tmpmask);
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index c1f5bb590b5e..b5a65e212df2 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -42,39 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt)
*/
static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
{
- int bc_moved;
/*
- * We try to cancel the timer first. If the callback is on
- * flight on some other cpu then we let it handle it. If we
- * were able to cancel the timer nothing can rearm it as we
- * own broadcast_lock.
+ * This is called either from enter/exit idle code or from the
+ * broadcast handler. In all cases tick_broadcast_lock is held.
*
- * However we can also be called from the event handler of
- * ce_broadcast_hrtimer itself when it expires. We cannot
- * restart the timer because we are in the callback, but we
- * can set the expiry time and let the callback return
- * HRTIMER_RESTART.
+ * hrtimer_cancel() cannot be called here neither from the
+ * broadcast handler nor from the enter/exit idle code. The idle
+ * code can run into the problem described in bc_shutdown() and the
+ * broadcast handler cannot wait for itself to complete for obvious
+ * reasons.
*
- * Since we are in the idle loop at this point and because
- * hrtimer_{start/cancel} functions call into tracing,
- * calls to these functions must be bound within RCU_NONIDLE.
+ * Each caller tries to arm the hrtimer on its own CPU, but if the
+ * hrtimer callbback function is currently running, then
+ * hrtimer_start() cannot move it and the timer stays on the CPU on
+ * which it is assigned at the moment.
+ *
+ * As this can be called from idle code, the hrtimer_start()
+ * invocation has to be wrapped with RCU_NONIDLE() as
+ * hrtimer_start() can call into tracing.
*/
- RCU_NONIDLE(
- {
- bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
- if (bc_moved) {
- hrtimer_start(&bctimer, expires,
- HRTIMER_MODE_ABS_PINNED_HARD);
- }
- }
- );
-
- if (bc_moved) {
- /* Bind the "device" to the cpu */
- bc->bound_on = smp_processor_id();
- } else if (bc->bound_on == smp_processor_id()) {
- hrtimer_set_expires(&bctimer, expires);
- }
+ RCU_NONIDLE( {
+ hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD);
+ /*
+ * The core tick broadcast mode expects bc->bound_on to be set
+ * correctly to prevent a CPU which has the broadcast hrtimer
+ * armed from going deep idle.
+ *
+ * As tick_broadcast_lock is held, nothing can change the cpu
+ * base which was just established in hrtimer_start() above. So
+ * the below access is safe even without holding the hrtimer
+ * base lock.
+ */
+ bc->bound_on = bctimer.base->cpu_base->cpu;
+ } );
return 0;
}
@@ -100,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
{
ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
- if (clockevent_state_oneshot(&ce_broadcast_hrtimer))
- if (ce_broadcast_hrtimer.next_event != KTIME_MAX)
- return HRTIMER_RESTART;
-
return HRTIMER_NORESTART;
}