summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/sockmap.c155
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/dma/Kconfig3
-rw-r--r--kernel/dma/direct.c4
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/events/hw_breakpoint.c13
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/jump_label.c2
-rw-r--r--kernel/locking/lockdep.c1
-rw-r--r--kernel/locking/mutex.c3
-rw-r--r--kernel/locking/test-ww_mutex.c2
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/printk/printk.c12
-rw-r--r--kernel/printk/printk_safe.c4
-rw-r--r--kernel/sched/debug.c6
-rw-r--r--kernel/sched/fair.c26
-rw-r--r--kernel/sched/topology.c5
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/time/clocksource.c40
-rw-r--r--kernel/trace/ring_buffer.c2
22 files changed, 205 insertions, 117 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2590700237c1..138f0302692e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1844,7 +1844,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
hdr = &btf->hdr;
cur = btf->nohdr_data + hdr->type_off;
- end = btf->nohdr_data + hdr->type_len;
+ end = cur + hdr->type_len;
env->log_type_id = 1;
while (cur < end) {
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index cf5195c7c331..0a0f2ec75370 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -132,6 +132,7 @@ struct smap_psock {
struct work_struct gc_work;
struct proto *sk_proto;
+ void (*save_unhash)(struct sock *sk);
void (*save_close)(struct sock *sk, long timeout);
void (*save_data_ready)(struct sock *sk);
void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
+static void bpf_tcp_unhash(struct sock *sk);
static void bpf_tcp_close(struct sock *sk, long timeout);
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
struct proto *base)
{
prot[SOCKMAP_BASE] = *base;
+ prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash;
prot[SOCKMAP_BASE].close = bpf_tcp_close;
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
return -EBUSY;
}
+ psock->save_unhash = sk->sk_prot->unhash;
psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot;
@@ -236,7 +240,7 @@ static int bpf_tcp_init(struct sock *sk)
}
static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);
static void bpf_tcp_release(struct sock *sk)
{
@@ -248,7 +252,7 @@ static void bpf_tcp_release(struct sock *sk)
goto out;
if (psock->cork) {
- free_start_sg(psock->sock, psock->cork);
+ free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
psock->cork = NULL;
}
@@ -305,39 +309,21 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
return e;
}
-static void bpf_tcp_close(struct sock *sk, long timeout)
+static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
{
- void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e;
struct sk_msg_buff *md, *mtmp;
- struct smap_psock *psock;
struct sock *osk;
- lock_sock(sk);
- rcu_read_lock();
- psock = smap_psock_sk(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
- }
-
- /* The psock may be destroyed anytime after exiting the RCU critial
- * section so by the time we use close_fun the psock may no longer
- * be valid. However, bpf_tcp_close is called with the sock lock
- * held so the close hook and sk are still valid.
- */
- close_fun = psock->save_close;
-
if (psock->cork) {
- free_start_sg(psock->sock, psock->cork);
+ free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
psock->cork = NULL;
}
list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
- free_start_sg(psock->sock, md);
+ free_start_sg(psock->sock, md, true);
kfree(md);
}
@@ -369,7 +355,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
/* If another thread deleted this object skip deletion.
* The refcnt on psock may or may not be zero.
*/
- if (l) {
+ if (l && l == link) {
hlist_del_rcu(&link->hash_node);
smap_release_sock(psock, link->sk);
free_htab_elem(htab, link);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
kfree(e);
e = psock_map_pop(sk, psock);
}
+}
+
+static void bpf_tcp_unhash(struct sock *sk)
+{
+ void (*unhash_fun)(struct sock *sk);
+ struct smap_psock *psock;
+
+ rcu_read_lock();
+ psock = smap_psock_sk(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->unhash)
+ sk->sk_prot->unhash(sk);
+ return;
+ }
+ unhash_fun = psock->save_unhash;
+ bpf_tcp_remove(sk, psock);
+ rcu_read_unlock();
+ unhash_fun(sk);
+}
+
+static void bpf_tcp_close(struct sock *sk, long timeout)
+{
+ void (*close_fun)(struct sock *sk, long timeout);
+ struct smap_psock *psock;
+
+ lock_sock(sk);
+ rcu_read_lock();
+ psock = smap_psock_sk(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ release_sock(sk);
+ return sk->sk_prot->close(sk, timeout);
+ }
+ close_fun = psock->save_close;
+ bpf_tcp_remove(sk, psock);
rcu_read_unlock();
release_sock(sk);
close_fun(sk, timeout);
@@ -570,14 +592,16 @@ static void free_bytes_sg(struct sock *sk, int bytes,
md->sg_start = i;
}
-static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+static int free_sg(struct sock *sk, int start,
+ struct sk_msg_buff *md, bool charge)
{
struct scatterlist *sg = md->sg_data;
int i = start, free = 0;
while (sg[i].length) {
free += sg[i].length;
- sk_mem_uncharge(sk, sg[i].length);
+ if (charge)
+ sk_mem_uncharge(sk, sg[i].length);
if (!md->skb)
put_page(sg_page(&sg[i]));
sg[i].length = 0;
@@ -594,9 +618,9 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
return free;
}
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
{
- int free = free_sg(sk, md->sg_start, md);
+ int free = free_sg(sk, md->sg_start, md, charge);
md->sg_start = md->sg_end;
return free;
@@ -604,7 +628,7 @@ static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
{
- return free_sg(sk, md->sg_curr, md);
+ return free_sg(sk, md->sg_curr, md, true);
}
static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
@@ -718,7 +742,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
} else {
- free_start_sg(sk, r);
+ free_start_sg(sk, r, true);
kfree(r);
}
@@ -752,14 +776,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
release_sock(sk);
}
smap_release_sock(psock, sk);
- if (unlikely(err))
- goto out;
- return 0;
+ return err;
out_rcu:
rcu_read_unlock();
-out:
- free_bytes_sg(NULL, send, md, false);
- return err;
+ return 0;
}
static inline void bpf_md_init(struct smap_psock *psock)
@@ -822,7 +842,7 @@ more_data:
case __SK_PASS:
err = bpf_tcp_push(sk, send, m, flags, true);
if (unlikely(err)) {
- *copied -= free_start_sg(sk, m);
+ *copied -= free_start_sg(sk, m, true);
break;
}
@@ -845,16 +865,17 @@ more_data:
lock_sock(sk);
if (unlikely(err < 0)) {
- free_start_sg(sk, m);
+ int free = free_start_sg(sk, m, false);
+
psock->sg_size = 0;
if (!cork)
- *copied -= send;
+ *copied -= free;
} else {
psock->sg_size -= send;
}
if (cork) {
- free_start_sg(sk, m);
+ free_start_sg(sk, m, true);
psock->sg_size = 0;
kfree(m);
m = NULL;
@@ -912,6 +933,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
rcu_read_lock();
psock = smap_psock_sk(sk);
@@ -922,9 +945,6 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
rcu_read_unlock();
- if (!skb_queue_empty(&sk->sk_receive_queue))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-
lock_sock(sk);
bytes_ready:
while (copied != len) {
@@ -1122,7 +1142,7 @@ wait_for_memory:
err = sk_stream_wait_memory(sk, &timeo);
if (err) {
if (m && m != psock->cork)
- free_start_sg(sk, m);
+ free_start_sg(sk, m, true);
goto out_err;
}
}
@@ -1464,10 +1484,16 @@ static void smap_destroy_psock(struct rcu_head *rcu)
schedule_work(&psock->gc_work);
}
+static bool psock_is_smap_sk(struct sock *sk)
+{
+ return inet_csk(sk)->icsk_ulp_ops == &bpf_tcp_ulp_ops;
+}
+
static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
{
if (refcount_dec_and_test(&psock->refcnt)) {
- tcp_cleanup_ulp(sock);
+ if (psock_is_smap_sk(sock))
+ tcp_cleanup_ulp(sock);
write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
@@ -1581,13 +1607,13 @@ static void smap_gc_work(struct work_struct *w)
bpf_prog_put(psock->bpf_tx_msg);
if (psock->cork) {
- free_start_sg(psock->sock, psock->cork);
+ free_start_sg(psock->sock, psock->cork, true);
kfree(psock->cork);
}
list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
- free_start_sg(psock->sock, md);
+ free_start_sg(psock->sock, md, true);
kfree(md);
}
@@ -1894,6 +1920,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
* doesn't update user data.
*/
if (psock) {
+ if (!psock_is_smap_sk(sock)) {
+ err = -EBUSY;
+ goto out_progs;
+ }
if (READ_ONCE(psock->bpf_parse) && parse) {
err = -EBUSY;
goto out_progs;
@@ -2089,8 +2119,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}
+ /* ULPs are currently supported only for TCP sockets in ESTABLISHED
+ * state.
+ */
if (skops.sk->sk_type != SOCK_STREAM ||
- skops.sk->sk_protocol != IPPROTO_TCP) {
+ skops.sk->sk_protocol != IPPROTO_TCP ||
+ skops.sk->sk_state != TCP_ESTABLISHED) {
fput(socket->file);
return -EOPNOTSUPP;
}
@@ -2445,6 +2479,16 @@ static int sock_hash_update_elem(struct bpf_map *map,
return -EINVAL;
}
+ /* ULPs are currently supported only for TCP sockets in ESTABLISHED
+ * state.
+ */
+ if (skops.sk->sk_type != SOCK_STREAM ||
+ skops.sk->sk_protocol != IPPROTO_TCP ||
+ skops.sk->sk_state != TCP_ESTABLISHED) {
+ fput(socket->file);
+ return -EOPNOTSUPP;
+ }
+
lock_sock(skops.sk);
preempt_disable();
rcu_read_lock();
@@ -2535,10 +2579,22 @@ const struct bpf_map_ops sock_hash_ops = {
.map_check_btf = map_check_no_btf,
};
+static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
+{
+ return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+ ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
struct bpf_map *, map, void *, key, u64, flags)
{
WARN_ON_ONCE(!rcu_read_lock_held());
+
+ /* ULPs are currently supported only for TCP sockets in ESTABLISHED
+ * state. This checks that the sock ops triggering the update is
+ * one indicating we are (or will be soon) in an ESTABLISHED state.
+ */
+ if (!bpf_is_valid_sock_op(bpf_sock))
+ return -EOPNOTSUPP;
return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
}
@@ -2557,6 +2613,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
struct bpf_map *, map, void *, key, u64, flags)
{
WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!bpf_is_valid_sock_op(bpf_sock))
+ return -EOPNOTSUPP;
return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 92246117d2b0..bb07e74b34a2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3163,7 +3163,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
* an arbitrary scalar. Disallow all math except
* pointer subtraction
*/
- if (opcode == BPF_SUB){
+ if (opcode == BPF_SUB && env->allow_ptr_leaks) {
mark_reg_unknown(env, regs, insn->dst_reg);
return 0;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index aa7fe85ad62e..0097acec1c71 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -607,15 +607,15 @@ static void cpuhp_thread_fun(unsigned int cpu)
bool bringup = st->bringup;
enum cpuhp_state state;
+ if (WARN_ON_ONCE(!st->should_run))
+ return;
+
/*
* ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
* that if we see ->should_run we also see the rest of the state.
*/
smp_mb();
- if (WARN_ON_ONCE(!st->should_run))
- return;
-
cpuhp_lock_acquire(bringup);
if (st->single) {
@@ -916,7 +916,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
if (ret) {
st->target = prev_state;
- undo_cpu_down(cpu, st);
+ if (st->state < prev_state)
+ undo_cpu_down(cpu, st);
break;
}
}
@@ -969,7 +970,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, target);
- if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+ if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
cpuhp_reset_state(st, prev_state);
__cpuhp_kick_ap(st);
}
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9bd54304446f..1b1d63b3634b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -23,6 +23,9 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU
bool
select NEED_DMA_MAP_STATE
+config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
+ bool
+
config DMA_DIRECT_OPS
bool
depends on HAS_DMA
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 1c35b7b945d0..de87b0282e74 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -168,7 +168,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
int dma_direct_supported(struct device *dev, u64 mask)
{
#ifdef CONFIG_ZONE_DMA
- if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+ if (mask < phys_to_dma(dev, DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)))
return 0;
#else
/*
@@ -177,7 +177,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
* memory, or by providing a ZONE_DMA32. If neither is the case, the
* architecture needs to use an IOMMU instead of the direct mapping.
*/
- if (mask < DMA_BIT_MASK(32))
+ if (mask < phys_to_dma(dev, DMA_BIT_MASK(32)))
return 0;
#endif
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2a62b96600ad..dcb093e7b377 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2867,16 +2867,11 @@ static int perf_event_modify_breakpoint(struct perf_event *bp,
_perf_event_disable(bp);
err = modify_user_hw_breakpoint_check(bp, attr, true);
- if (err) {
- if (!bp->attr.disabled)
- _perf_event_enable(bp);
- return err;
- }
-
- if (!attr->disabled)
+ if (!bp->attr.disabled)
_perf_event_enable(bp);
- return 0;
+
+ return err;
}
static int perf_event_modify_attr(struct perf_event *event,
@@ -3940,6 +3935,12 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
goto out;
}
+ /* If this is a pinned event it must be running on this CPU */
+ if (event->attr.pinned && event->oncpu != smp_processor_id()) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/*
* If the event is currently on this CPU, its either a per-task event,
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
@@ -5948,6 +5949,7 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
unsigned long sp;
unsigned int rem;
u64 dyn_size;
+ mm_segment_t fs;
/*
* We dump:
@@ -5965,7 +5967,10 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
/* Data. */
sp = perf_user_stack_pointer(regs);
+ fs = get_fs();
+ set_fs(USER_DS);
rem = __output_copy_user(handle, (void *) sp, dump_size);
+ set_fs(fs);
dyn_size = dump_size - rem;
perf_output_skip(handle, rem);
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b3814fce5ecb..d6b56180827c 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -509,6 +509,8 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *a
*/
int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
{
+ int err;
+
/*
* modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
* will not be possible to raise IPIs that invoke __perf_event_disable.
@@ -520,15 +522,12 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
else
perf_event_disable(bp);
- if (!attr->disabled) {
- int err = modify_user_hw_breakpoint_check(bp, attr, false);
+ err = modify_user_hw_breakpoint_check(bp, attr, false);
- if (err)
- return err;
+ if (!bp->attr.disabled)
perf_event_enable(bp);
- bp->attr.disabled = 0;
- }
- return 0;
+
+ return err;
}
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0..f0b58479534f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -550,8 +550,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
}
/* a new mm has just been created */
- arch_dup_mmap(oldmm, mm);
- retval = 0;
+ retval = arch_dup_mmap(oldmm, mm);
out:
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 01ebdf1f9f40..2e62503bea0d 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -678,7 +678,7 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
case MODULE_STATE_COMING:
ret = jump_label_add_module(mod);
if (ret) {
- WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n");
+ WARN(1, "Failed to allocate memory: jump_label may not work properly.\n");
jump_label_del_module(mod);
}
break;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index e406c5fdb41e..dd13f865ad40 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -55,7 +55,6 @@
#include "lockdep_internals.h"
-#include <trace/events/preemptirq.h>
#define CREATE_TRACE_POINTS
#include <trace/events/lock.h>
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 1a81a1257b3f..3f8a35104285 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -389,7 +389,7 @@ static bool __ww_mutex_wound(struct mutex *lock,
/*
* wake_up_process() paired with set_current_state()
* inserts sufficient barriers to make sure @owner either sees
- * it's wounded in __ww_mutex_lock_check_stamp() or has a
+ * it's wounded in __ww_mutex_check_kill() or has a
* wakeup pending to re-read the wounded state.
*/
if (owner != current)
@@ -946,7 +946,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}
debug_mutex_lock_common(lock, &waiter);
- debug_mutex_add_waiter(lock, &waiter, current);
lock_contended(&lock->dep_map, ip);
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 5b915b370d5a..0be047dbd897 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -324,7 +324,7 @@ static int __test_cycle(unsigned int nthreads)
if (!cycle->result)
continue;
- pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n",
+ pr_err("cyclic deadlock not resolved, ret[%d/%d] = %d\n",
n, nthreads, cycle->result);
ret = -EINVAL;
break;
diff --git a/kernel/pid.c b/kernel/pid.c
index de1cfc4f75a2..cdf63e53a014 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -195,7 +195,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
idr_preload_end();
if (nr < 0) {
- retval = nr;
+ retval = (nr == -ENOSPC) ? -EAGAIN : nr;
goto out_free;
}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fd6f8ed28e01..9bf5404397e0 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -351,7 +351,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
*/
enum log_flags {
- LOG_NOCONS = 1, /* suppress print, do not print to console */
LOG_NEWLINE = 2, /* text ended with a newline */
LOG_PREFIX = 4, /* text started with a prefix */
LOG_CONT = 8, /* text is a fragment of a continuation line */
@@ -1881,9 +1880,6 @@ int vprintk_store(int facility, int level,
if (dict)
lflags |= LOG_PREFIX|LOG_NEWLINE;
- if (suppress_message_printing(level))
- lflags |= LOG_NOCONS;
-
return log_output(facility, level, lflags,
dict, dictlen, text, text_len);
}
@@ -2032,6 +2028,7 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
const char *text, size_t len) {}
static size_t msg_print_text(const struct printk_log *msg,
bool syslog, char *buf, size_t size) { return 0; }
+static bool suppress_message_printing(int level) { return false; }
#endif /* CONFIG_PRINTK */
@@ -2368,10 +2365,11 @@ skip:
break;
msg = log_from_idx(console_idx);
- if (msg->flags & LOG_NOCONS) {
+ if (suppress_message_printing(msg->level)) {
/*
- * Skip record if !ignore_loglevel, and
- * record has level above the console loglevel.
+ * Skip record we have buffered and already printed
+ * directly to the console when we received it, and
+ * record that has level above the console loglevel.
*/
console_idx = log_next(console_idx);
console_seq++;
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index a0a74c533e4b..0913b4d385de 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -306,12 +306,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
return printk_safe_log_store(s, fmt, args);
}
-void printk_nmi_enter(void)
+void notrace printk_nmi_enter(void)
{
this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
}
-void printk_nmi_exit(void)
+void notrace printk_nmi_exit(void)
{
this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 60caf1fb94e0..6383aa6a60ca 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -89,12 +89,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
static void sched_feat_disable(int i)
{
- static_key_disable(&sched_feat_keys[i]);
+ static_key_disable_cpuslocked(&sched_feat_keys[i]);
}
static void sched_feat_enable(int i)
{
- static_key_enable(&sched_feat_keys[i]);
+ static_key_enable_cpuslocked(&sched_feat_keys[i]);
}
#else
static void sched_feat_disable(int i) { };
@@ -146,9 +146,11 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
+ cpus_read_lock();
inode_lock(inode);
ret = sched_feat_set(cmp);
inode_unlock(inode);
+ cpus_read_unlock();
if (ret < 0)
return ret;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b39fb596f6c1..f808ddf2a868 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3362,6 +3362,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
* @cfs_rq: cfs_rq to attach to
* @se: sched_entity to attach
+ * @flags: migration hints
*
* Must call update_cfs_rq_load_avg() before this, since we rely on
* cfs_rq->avg.last_update_time being current.
@@ -7263,6 +7264,7 @@ static void update_blocked_averages(int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq, *pos;
+ const struct sched_class *curr_class;
struct rq_flags rf;
bool done = true;
@@ -7299,8 +7301,10 @@ static void update_blocked_averages(int cpu)
if (cfs_rq_has_blocked(cfs_rq))
done = false;
}
- update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
- update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
+
+ curr_class = rq->curr->sched_class;
+ update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
+ update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
/* Don't need periodic decay once load/util_avg are null */
if (others_have_blocked(rq))
@@ -7365,13 +7369,16 @@ static inline void update_blocked_averages(int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq = &rq->cfs;
+ const struct sched_class *curr_class;
struct rq_flags rf;
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
- update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
- update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
+
+ curr_class = rq->curr->sched_class;
+ update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
+ update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
#ifdef CONFIG_NO_HZ_COMMON
rq->last_blocked_load_update_tick = jiffies;
@@ -7482,10 +7489,10 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
return load_idx;
}
-static unsigned long scale_rt_capacity(int cpu)
+static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
+ unsigned long max = arch_scale_cpu_capacity(sd, cpu);
unsigned long used, free;
unsigned long irq;
@@ -7507,7 +7514,7 @@ static unsigned long scale_rt_capacity(int cpu)
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
- unsigned long capacity = scale_rt_capacity(cpu);
+ unsigned long capacity = scale_rt_capacity(sd, cpu);
struct sched_group *sdg = sd->groups;
cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
@@ -8269,7 +8276,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(env, &sds);
- return sds.busiest;
+ return env->imbalance ? sds.busiest : NULL;
out_balanced:
env->imbalance = 0;
@@ -9638,7 +9645,8 @@ static inline bool vruntime_normalized(struct task_struct *p)
* - A task which has been woken up by try_to_wake_up() and
* waiting for actually being woken up by sched_ttwu_pending().
*/
- if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+ if (!se->sum_exec_runtime ||
+ (p->state == TASK_WAKING && p->sched_remote_wakeup))
return true;
return false;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 56a0fed30c0a..505a41c42b96 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1295,7 +1295,7 @@ static void init_numa_topology_type(void)
n = sched_max_numa_distance;
- if (sched_domains_numa_levels <= 1) {
+ if (sched_domains_numa_levels <= 2) {
sched_numa_topology_type = NUMA_DIRECT;
return;
}
@@ -1380,9 +1380,6 @@ void sched_init_numa(void)
break;
}
- if (!level)
- return;
-
/*
* 'level' contains the number of unique distances
*
diff --git a/kernel/sys.c b/kernel/sys.c
index cf5c67533ff1..123bd73046ec 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -71,9 +71,6 @@
#include <asm/io.h>
#include <asm/unistd.h>
-/* Hardening for Spectre-v1 */
-#include <linux/nospec.h>
-
#include "uid16.h"
#ifndef SET_UNALIGN_CTL
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f74fb00d8064..0e6e97a01942 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -133,19 +133,40 @@ static void inline clocksource_watchdog_unlock(unsigned long *flags)
spin_unlock_irqrestore(&watchdog_lock, *flags);
}
+static int clocksource_watchdog_kthread(void *data);
+static void __clocksource_change_rating(struct clocksource *cs, int rating);
+
/*
* Interval: 0.5sec Threshold: 0.0625s
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
+static void clocksource_watchdog_work(struct work_struct *work)
+{
+ /*
+ * We cannot directly run clocksource_watchdog_kthread() here, because
+ * clocksource_select() calls timekeeping_notify() which uses
+ * stop_machine(). One cannot use stop_machine() from a workqueue() due
+ * lock inversions wrt CPU hotplug.
+ *
+ * Also, we only ever run this work once or twice during the lifetime
+ * of the kernel, so there is no point in creating a more permanent
+ * kthread for this.
+ *
+ * If kthread_run fails the next watchdog scan over the
+ * watchdog_list will find the unstable clock again.
+ */
+ kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
+}
+
static void __clocksource_unstable(struct clocksource *cs)
{
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
cs->flags |= CLOCK_SOURCE_UNSTABLE;
/*
- * If the clocksource is registered clocksource_watchdog_work() will
+ * If the clocksource is registered clocksource_watchdog_kthread() will
* re-rate and re-select.
*/
if (list_empty(&cs->list)) {
@@ -156,7 +177,7 @@ static void __clocksource_unstable(struct clocksource *cs)
if (cs->mark_unstable)
cs->mark_unstable(cs);
- /* kick clocksource_watchdog_work() */
+ /* kick clocksource_watchdog_kthread() */
if (finished_booting)
schedule_work(&watchdog_work);
}
@@ -166,7 +187,7 @@ static void __clocksource_unstable(struct clocksource *cs)
* @cs: clocksource to be marked unstable
*
* This function is called by the x86 TSC code to mark clocksources as unstable;
- * it defers demotion and re-selection to a work.
+ * it defers demotion and re-selection to a kthread.
*/
void clocksource_mark_unstable(struct clocksource *cs)
{
@@ -391,9 +412,7 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
}
}
-static void __clocksource_change_rating(struct clocksource *cs, int rating);
-
-static int __clocksource_watchdog_work(void)
+static int __clocksource_watchdog_kthread(void)
{
struct clocksource *cs, *tmp;
unsigned long flags;
@@ -418,12 +437,13 @@ static int __clocksource_watchdog_work(void)
return select;
}
-static void clocksource_watchdog_work(struct work_struct *work)
+static int clocksource_watchdog_kthread(void *data)
{
mutex_lock(&clocksource_mutex);
- if (__clocksource_watchdog_work())
+ if (__clocksource_watchdog_kthread())
clocksource_select();
mutex_unlock(&clocksource_mutex);
+ return 0;
}
static bool clocksource_is_watchdog(struct clocksource *cs)
@@ -442,7 +462,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
static void clocksource_select_watchdog(bool fallback) { }
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
-static inline int __clocksource_watchdog_work(void) { return 0; }
+static inline int __clocksource_watchdog_kthread(void) { return 0; }
static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
void clocksource_mark_unstable(struct clocksource *cs) { }
@@ -810,7 +830,7 @@ static int __init clocksource_done_booting(void)
/*
* Run the watchdog first to eliminate unstable clock sources
*/
- __clocksource_watchdog_work();
+ __clocksource_watchdog_kthread();
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d92d4a982fd..65bd4616220d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1546,6 +1546,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
tmp_iter_page = first_page;
do {
+ cond_resched();
+
to_remove_page = tmp_iter_page;
rb_inc_page(cpu_buffer, &tmp_iter_page);