diff options
Diffstat (limited to 'kernel')
123 files changed, 782 insertions, 376 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index ca9cb55b5855..5b1284370367 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -56,6 +56,8 @@ #include <linux/syscalls.h> #include <linux/mount.h> #include <linux/uaccess.h> +#include <linux/sched/cputime.h> + #include <asm/div64.h> #include <linux/blkdev.h> /* sector_div */ #include <linux/pid_namespace.h> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3ea87fb19a94..afe5bab376c9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -13,11 +13,12 @@ #include <linux/bpf.h> #include <linux/jhash.h> #include <linux/filter.h> +#include <linux/rculist_nulls.h> #include "percpu_freelist.h" #include "bpf_lru_list.h" struct bucket { - struct hlist_head head; + struct hlist_nulls_head head; raw_spinlock_t lock; }; @@ -44,9 +45,14 @@ enum extra_elem_state { /* each htab element is struct htab_elem + key + value */ struct htab_elem { union { - struct hlist_node hash_node; - struct bpf_htab *htab; - struct pcpu_freelist_node fnode; + struct hlist_nulls_node hash_node; + struct { + void *padding; + union { + struct bpf_htab *htab; + struct pcpu_freelist_node fnode; + }; + }; }; union { struct rcu_head rcu; @@ -162,7 +168,8 @@ skip_percpu_elems: offsetof(struct htab_elem, lru_node), htab->elem_size, htab->map.max_entries); else - pcpu_freelist_populate(&htab->freelist, htab->elems, + pcpu_freelist_populate(&htab->freelist, + htab->elems + offsetof(struct htab_elem, fnode), htab->elem_size, htab->map.max_entries); return 0; @@ -217,6 +224,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) int err, i; u64 cost; + BUILD_BUG_ON(offsetof(struct htab_elem, htab) != + offsetof(struct htab_elem, hash_node.pprev)); + BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != + offsetof(struct htab_elem, hash_node.pprev)); + if (lru && !capable(CAP_SYS_ADMIN)) /* LRU implementation is much complicated than other * maps. Hence, limit to CAP_SYS_ADMIN for now. @@ -326,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) goto free_htab; for (i = 0; i < htab->n_buckets; i++) { - INIT_HLIST_HEAD(&htab->buckets[i].head); + INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); } @@ -366,20 +378,44 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) return &htab->buckets[hash & (htab->n_buckets - 1)]; } -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) { return &__select_bucket(htab, hash)->head; } -static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, +/* this lookup function can only be called with bucket lock taken */ +static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, void *key, u32 key_size) { + struct hlist_nulls_node *n; + struct htab_elem *l; + + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + + return NULL; +} + +/* can be called without bucket lock. it will repeat the loop in + * the unlikely event when elements moved from one bucket into another + * while link list is being walked + */ +static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, + u32 hash, void *key, + u32 key_size, u32 n_buckets) +{ + struct hlist_nulls_node *n; struct htab_elem *l; - hlist_for_each_entry_rcu(l, head, hash_node) +again: + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l->hash == hash && !memcmp(&l->key, key, key_size)) return l; + if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) + goto again; + return NULL; } @@ -387,7 +423,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct htab_elem *l; u32 hash, key_size; @@ -400,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) head = select_bucket(htab, hash); - l = lookup_elem_raw(head, hash, key, key_size); + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); return l; } @@ -433,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) { struct bpf_htab *htab = (struct bpf_htab *)arg; - struct htab_elem *l, *tgt_l; - struct hlist_head *head; + struct htab_elem *l = NULL, *tgt_l; + struct hlist_nulls_head *head; + struct hlist_nulls_node *n; unsigned long flags; struct bucket *b; @@ -444,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) raw_spin_lock_irqsave(&b->lock, flags); - hlist_for_each_entry_rcu(l, head, hash_node) + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); break; } @@ -459,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; int i; @@ -473,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) head = select_bucket(htab, hash); /* lookup the key */ - l = lookup_elem_raw(head, hash, key, key_size); + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); if (!l) { i = 0; @@ -481,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) } /* key was found, get next key in the same bucket */ - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), struct htab_elem, hash_node); if (next_l) { @@ -500,7 +537,7 @@ find_first_elem: head = select_bucket(htab, i); /* pick first element in the bucket */ - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), struct htab_elem, hash_node); if (next_l) { /* if it's not empty, just return it */ @@ -582,9 +619,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, int err = 0; if (prealloc) { - l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); - if (!l_new) + struct pcpu_freelist_node *l; + + l = pcpu_freelist_pop(&htab->freelist); + if (!l) err = -E2BIG; + else + l_new = container_of(l, struct htab_elem, fnode); } else { if (atomic_inc_return(&htab->count) > htab->map.max_entries) { atomic_dec(&htab->count); @@ -661,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -700,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, /* add new element to the head of the list, so that * concurrent search will find it before old elem */ - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { - hlist_del_rcu(&l_old->hash_node); + hlist_nulls_del_rcu(&l_old->hash_node); free_htab_elem(htab, l_old); } ret = 0; @@ -716,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old = NULL; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -757,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, /* add new element to the head of the list, so that * concurrent search will find it before old elem */ - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { bpf_lru_node_set_ref(&l_new->lru_node); - hlist_del_rcu(&l_old->hash_node); + hlist_nulls_del_rcu(&l_old->hash_node); } ret = 0; @@ -781,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -820,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, ret = PTR_ERR(l_new); goto err; } - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); } ret = 0; err: @@ -834,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -882,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, } else { pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), value, onallcpus); - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; } ret = 0; @@ -910,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, static int htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct bucket *b; struct htab_elem *l; unsigned long flags; @@ -930,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); if (l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); free_htab_elem(htab, l); ret = 0; } @@ -942,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct bucket *b; struct htab_elem *l; unsigned long flags; @@ -962,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); if (l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); ret = 0; } @@ -977,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab) int i; for (i = 0; i < htab->n_buckets; i++) { - struct hlist_head *head = select_bucket(htab, i); - struct hlist_node *n; + struct hlist_nulls_head *head = select_bucket(htab, i); + struct hlist_nulls_node *n; struct htab_elem *l; - hlist_for_each_entry_safe(l, n, head, hash_node) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { + hlist_nulls_del_rcu(&l->hash_node); if (l->state != HTAB_EXTRA_ELEM_USED) htab_elem_free(htab, l); } diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 8bfe0afaee10..b37bd9ab7f57 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -500,9 +500,15 @@ unlock: raw_spin_unlock(&trie->lock); } +static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + static const struct bpf_map_ops trie_ops = { .map_alloc = trie_alloc, .map_free = trie_free, + .map_get_next_key = trie_get_next_key, .map_lookup_elem = trie_lookup_elem, .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 461eb1e66a0f..7af0dcc5d755 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -13,6 +13,7 @@ #include <linux/bpf_trace.h> #include <linux/syscalls.h> #include <linux/slab.h> +#include <linux/sched/signal.h> #include <linux/vmalloc.h> #include <linux/mmzone.h> #include <linux/anon_inodes.h> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3fc6e39b223e..796b68d00119 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -33,7 +33,7 @@ * - out of bounds or malformed jumps * The second pass is all possible path descent from the 1st insn. * Since it's analyzing all pathes through the program, the length of the - * analysis is limited to 32k insn, which may be hit even if total number of + * analysis is limited to 64k insn, which may be hit even if total number of * insn is less then 4K, but there are too many branches that change stack/regs. * Number of 'branches to be analyzed' is limited to 1k * diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index fc34bcf2329f..1dc22f6b49f5 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -5,6 +5,9 @@ #include <linux/sort.h> #include <linux/delay.h> #include <linux/mm.h> +#include <linux/sched/signal.h> +#include <linux/sched/task.h> +#include <linux/magic.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/delayacct.h> @@ -1326,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) struct task_struct *task; int count = 0; - seq_printf(seq, "css_set %p\n", cset); + seq_printf(seq, "css_set %pK\n", cset); list_for_each_entry(task, &cset->tasks, cg_list) { if (count++ > MAX_TASKS_SHOWN_PER_CSS) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e8f87bf9840c..48851327a15e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -41,6 +41,7 @@ #include <linux/proc_fs.h> #include <linux/rcupdate.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/percpu-rwsem.h> @@ -2668,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css) * * Returns 0 on success, -errno on failure. On failure, csses which have * been processed already aren't cleaned up. The caller is responsible for - * cleaning up with cgroup_apply_control_disble(). + * cleaning up with cgroup_apply_control_disable(). */ static int cgroup_apply_control_enable(struct cgroup *cgrp) { diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index b3088886cd37..0f41292be0fb 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -44,6 +44,8 @@ #include <linux/proc_fs.h> #include <linux/rcupdate.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <linux/seq_file.h> #include <linux/security.h> #include <linux/slab.h> diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c index cff7ea62c38f..96d38dab6fb2 100644 --- a/kernel/cgroup/namespace.c +++ b/kernel/cgroup/namespace.c @@ -1,6 +1,6 @@ #include "cgroup-internal.h" -#include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/slab.h> #include <linux/nsproxy.h> #include <linux/proc_ns.h> diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 2bd673783f1a..2237201d66d5 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -214,7 +214,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset) /* * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies - * on threadgroup_change_begin() held by the copy_process(). + * on cgroup_threadgroup_change_begin() held by the copy_process(). */ static int pids_can_fork(struct task_struct *task) { @@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task) /* Only log the first time events_limit is incremented. */ if (atomic64_inc_return(&pids->events_limit) == 1) { pr_info("cgroup: fork rejected by pids controller in "); - pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id)); + pr_cont_cgroup_path(css->cgroup); pr_cont("\n"); } cgroup_file_notify(&pids->events_file); diff --git a/kernel/cpu.c b/kernel/cpu.c index 0a5f630f5c54..f7c063239fa5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -7,7 +7,9 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/notifier.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/hotplug.h> +#include <linux/sched/task.h> #include <linux/unistd.h> #include <linux/cpu.h> #include <linux/oom.h> diff --git a/kernel/cred.c b/kernel/cred.c index 5f264fb5737d..2bc66075740f 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -12,6 +12,7 @@ #include <linux/cred.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sched/coredump.h> #include <linux/key.h> #include <linux/keyctl.h> #include <linux/init_task.h> diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 79517e5549f1..65c0f1363788 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -49,6 +49,7 @@ #include <linux/init.h> #include <linux/kgdb.h> #include <linux/kdb.h> +#include <linux/nmi.h> #include <linux/pid.h> #include <linux/smp.h> #include <linux/mm.h> @@ -232,9 +233,9 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) int i; for (i = 0; i < VMACACHE_SIZE; i++) { - if (!current->vmacache[i]) + if (!current->vmacache.vmas[i]) continue; - flush_cache_range(current->vmacache[i], + flush_cache_range(current->vmacache.vmas[i], addr, addr + BREAK_INSTR_SIZE); } } diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 19d9a578c753..7510dc687c0d 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -29,6 +29,7 @@ */ #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <linux/kgdb.h> #include <linux/kdb.h> #include <linux/serial_core.h> diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index fe15fff5df53..6ad4a9fcbd6f 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -12,7 +12,8 @@ #include <linux/ctype.h> #include <linux/string.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/kdb.h> #include <linux/nmi.h> #include "kdb_private.h" diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index ca183919d302..c8146d53ca67 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -18,6 +18,9 @@ #include <linux/kmsg_dump.h> #include <linux/reboot.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> +#include <linux/sched/stat.h> +#include <linux/sched/debug.h> #include <linux/sysrq.h> #include <linux/smp.h> #include <linux/utsname.h> diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 660549656991..4a1c33416b6a 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -14,6 +14,8 @@ */ #include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/sched/cputime.h> #include <linux/slab.h> #include <linux/taskstats.h> #include <linux/time.h> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index e9fdb5203de5..c04917cad1bf 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -11,6 +11,8 @@ #include <linux/perf_event.h> #include <linux/slab.h> +#include <linux/sched/task_stack.h> + #include "internal.h" struct callchain_cpus_entries { diff --git a/kernel/events/core.c b/kernel/events/core.c index 7298e149b732..6e75a5c9412d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,10 @@ #include <linux/filter.h> #include <linux/namei.h> #include <linux/parser.h> +#include <linux/sched/clock.h> +#include <linux/sched/mm.h> +#include <linux/proc_ns.h> +#include <linux/mount.h> #include "internal.h" @@ -377,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -996,7 +1001,7 @@ list_update_cgroup_event(struct perf_event *event, */ #define PERF_CPU_HRTIMER (1000 / HZ) /* - * function must be called with interrupts disbled + * function must be called with interrupts disabled */ static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr) { @@ -3989,6 +3994,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6489,6 +6496,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6591,6 +6599,132 @@ void perf_event_comm(struct task_struct *task, bool exec) } /* + * namespaces tracking + */ + +struct perf_namespaces_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[NR_NAMESPACES]; + } event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + perf_event_header__init_id(&namespaces_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + namespaces_event->event_id.header.size); + if (ret) + return; + + namespaces_event->event_id.pid = perf_event_pid(event, + namespaces_event->task); + namespaces_event->event_id.tid = perf_event_tid(event, + namespaces_event->task); + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, + struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct path ns_path; + struct inode *ns_inode; + void *error; + + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) { + ns_inode = ns_path.dentry->d_inode; + ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); + ns_link_info->ino = ns_inode->i_ino; + } +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + struct perf_ns_link_info *ns_link_info; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + .nr_namespaces = NR_NAMESPACES, + /* .link_info[NR_NAMESPACES] */ + }, + }; + + ns_link_info = namespaces_event.event_id.link_info; + + perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX], + task, &mntns_operations); + +#ifdef CONFIG_USER_NS + perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX], + task, &userns_operations); +#endif +#ifdef CONFIG_NET_NS + perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX], + task, &netns_operations); +#endif +#ifdef CONFIG_UTS_NS + perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX], + task, &utsns_operations); +#endif +#ifdef CONFIG_IPC_NS + perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX], + task, &ipcns_operations); +#endif +#ifdef CONFIG_PID_NS + perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX], + task, &pidns_operations); +#endif +#ifdef CONFIG_CGROUPS + perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX], + task, &cgroupns_operations); +#endif + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + +/* * mmap tracking */ @@ -9144,6 +9278,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -9689,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EACCES; } + if (attr.namespaces) { + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } + if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 257fa460b846..2831480c63a2 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->paused = 1; } +void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) +{ + /* + * OVERWRITE is determined by perf_aux_output_end() and can't + * be passed in directly. + */ + if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE)) + return; + + handle->aux_flags |= flags; +} +EXPORT_SYMBOL_GPL(perf_aux_output_flag); + /* * This is called before hardware starts writing to the AUX area to * obtain an output handle and make sure there's room in the buffer. @@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, handle->event = event; handle->head = aux_head; handle->size = 0; + handle->aux_flags = 0; /* * In overwrite mode, AUX data stores do not depend on aux_tail, @@ -408,34 +422,32 @@ err: * of the AUX buffer management code is that after pmu::stop(), the AUX * transaction must be stopped and therefore drop the AUX reference count. */ -void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) +void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { + bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED); struct ring_buffer *rb = handle->rb; - bool wakeup = truncated; unsigned long aux_head; - u64 flags = 0; - - if (truncated) - flags |= PERF_AUX_FLAG_TRUNCATED; /* in overwrite mode, driver provides aux_head via handle */ if (rb->aux_overwrite) { - flags |= PERF_AUX_FLAG_OVERWRITE; + handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; local_set(&rb->aux_head, aux_head); } else { + handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; + aux_head = local_read(&rb->aux_head); local_add(size, &rb->aux_head); } - if (size || flags) { + if (size || handle->aux_flags) { /* * Only send RECORD_AUX if we have something useful to communicate */ - perf_event_aux_event(handle->event, aux_head, size, flags); + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); } aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); @@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, } if (wakeup) { - if (truncated) + if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) handle->event->pending_disable = 1; perf_output_wakeup(handle); } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d630f8ac4d2f..0e137f98a50c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -27,6 +27,8 @@ #include <linux/pagemap.h> /* read_mapping_page */ #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> #include <linux/export.h> #include <linux/rmap.h> /* anon_vma_prepare */ #include <linux/mmu_notifier.h> /* set_pte_at_notify */ diff --git a/kernel/exit.c b/kernel/exit.c index 8a768a3672a5..516acdb0e0ec 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -6,6 +6,12 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <linux/sched/autogroup.h> +#include <linux/sched/mm.h> +#include <linux/sched/stat.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/capability.h> @@ -548,7 +554,6 @@ static void exit_mm(void) enter_lazy_tlb(mm, current); task_unlock(current); mm_update_next_owner(mm); - userfaultfd_exit(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) exit_oom_victim(); diff --git a/kernel/fork.c b/kernel/fork.c index 246bf9aaf9df..afa2947286cd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -12,6 +12,16 @@ */ #include <linux/slab.h> +#include <linux/sched/autogroup.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> +#include <linux/sched/user.h> +#include <linux/sched/numa_balancing.h> +#include <linux/sched/stat.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> +#include <linux/rtmutex.h> #include <linux/init.h> #include <linux/unistd.h> #include <linux/module.h> @@ -1455,6 +1465,21 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) task->pids[type].pid = pid; } +static inline void rcu_copy_process(struct task_struct *p) +{ +#ifdef CONFIG_PREEMPT_RCU + p->rcu_read_lock_nesting = 0; + p->rcu_read_unlock_special.s = 0; + p->rcu_blocked_node = NULL; + INIT_LIST_HEAD(&p->rcu_node_entry); +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + p->rcu_tasks_holdout = false; + INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); + p->rcu_tasks_idle_cpu = -1; +#endif /* #ifdef CONFIG_TASKS_RCU */ +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -1746,7 +1771,7 @@ static __latent_entropy struct task_struct *copy_process( INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; - threadgroup_change_begin(current); + cgroup_threadgroup_change_begin(current); /* * Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed @@ -1843,7 +1868,7 @@ static __latent_entropy struct task_struct *copy_process( proc_fork_connector(p); cgroup_post_fork(p); - threadgroup_change_end(current); + cgroup_threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); @@ -1854,7 +1879,7 @@ static __latent_entropy struct task_struct *copy_process( bad_fork_cancel_cgroup: cgroup_cancel_fork(p); bad_fork_free_pid: - threadgroup_change_end(current); + cgroup_threadgroup_change_end(current); if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_thread: @@ -2327,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } } + perf_event_namespaces(current); + bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); diff --git a/kernel/futex.c b/kernel/futex.c index b687cb22301c..229a744b1781 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -61,6 +61,8 @@ #include <linux/nsproxy.h> #include <linux/ptrace.h> #include <linux/sched/rt.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/mm.h> #include <linux/hugetlb.h> #include <linux/freezer.h> #include <linux/bootmem.h> diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 40c07e4fa116..f0f8e2a8496f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,9 @@ #include <linux/export.h> #include <linux/sysctl.h> #include <linux/utsname.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> + #include <trace/events/sched.h> /* diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 944d068b6c48..a4afe5cc5af1 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -17,6 +17,8 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/rt.h> +#include <linux/sched/task.h> +#include <uapi/linux/sched/types.h> #include <linux/task_work.h> #include "internals.h" diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b56a558e406d..b118735fea9d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image) ret = crypto_shash_final(desc, digest); if (ret) goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "sha_regions", - sha_regions, sha_region_sz, 0); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", + sha_regions, sha_region_sz, 0); if (ret) goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "sha256_digest", - digest, SHA256_DIGEST_SIZE, 0); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", + digest, SHA256_DIGEST_SIZE, 0); if (ret) goto out_free_digest; } diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 4cef7e4706b0..799a8a452187 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image, extern struct mutex kexec_mutex; #ifdef CONFIG_KEXEC_FILE -struct kexec_sha_region { - unsigned long start; - unsigned long len; -}; - +#include <linux/purgatory.h> void kimage_file_post_load_cleanup(struct kimage *image); #else /* CONFIG_KEXEC_FILE */ static inline void kimage_file_post_load_cleanup(struct kimage *image) { } diff --git a/kernel/kmod.c b/kernel/kmod.c index 0c407f905ca4..563f97e2be36 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -20,6 +20,8 @@ */ #include <linux/module.h> #include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/binfmts.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/kmod.h> diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 699c5bc51a92..d733479a10ee 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr) * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, + const char *symbol_name, unsigned int offset) { - kprobe_opcode_t *addr = p->addr; - - if ((p->symbol_name && p->addr) || - (!p->symbol_name && !p->addr)) + if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; - if (p->symbol_name) { - kprobe_lookup_name(p->symbol_name, addr); + if (symbol_name) { + kprobe_lookup_name(symbol_name, addr); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); + addr = (kprobe_opcode_t *)(((char *)addr) + offset); if (addr) return addr; @@ -1413,6 +1411,11 @@ invalid: return ERR_PTR(-EINVAL); } +static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +{ + return _kprobe_addr(p->addr, p->symbol_name, p->offset); +} + /* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { @@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num) } EXPORT_SYMBOL_GPL(unregister_kprobes); -int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +int __weak kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { return NOTIFY_DONE; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); static struct notifier_block kprobe_exceptions_nb = { .notifier_call = kprobe_exceptions_notify, @@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(pre_handler_kretprobe); +bool __weak arch_function_offset_within_entry(unsigned long offset) +{ + return !offset; +} + +bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +{ + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + + if (IS_ERR(kp_addr)) + return false; + + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || + !arch_function_offset_within_entry(offset)) + return false; + + return true; +} + int register_kretprobe(struct kretprobe *rp) { int ret = 0; @@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp) int i; void *addr; + if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) + return -EINVAL; + if (kretprobe_blacklist_size) { addr = kprobe_addr(&rp->kp); if (IS_ERR(addr)) diff --git a/kernel/kthread.c b/kernel/kthread.c index 8461a4372e8a..2f26adea0f84 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -5,7 +5,9 @@ * even if we're invoked from userspace (think modprobe, hotplug cpu, * etc.). */ +#include <uapi/linux/sched/types.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/kthread.h> #include <linux/completion.h> #include <linux/err.h> diff --git a/kernel/latencytop.c b/kernel/latencytop.c index b5c30d9f46c5..96b4179cee6a 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -55,6 +55,8 @@ #include <linux/latencytop.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/stat.h> #include <linux/list.h> #include <linux/stacktrace.h> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9812e5dd409e..a95e5d1f4a9c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -28,6 +28,8 @@ #define DISABLE_BRANCH_PROFILING #include <linux/mutex.h> #include <linux/sched.h> +#include <linux/sched/clock.h> +#include <linux/sched/task.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/proc_fs.h> @@ -3260,10 +3262,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) + if (hlock->references) { + /* + * Check: unsigned int references:12, overflow. + */ + if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1)) + return 0; + hlock->references++; - else + } else { hlock->references = 2; + } return 1; } diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 28350dc8ecbb..f24582d4dad3 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -32,6 +32,8 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/sched.h> +#include <uapi/linux/sched/types.h> +#include <linux/rtmutex.h> #include <linux/atomic.h> #include <linux/moduleparam.h> #include <linux/delay.h> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index ad2d9e22697b..198527a62149 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -19,8 +19,10 @@ */ #include <linux/mutex.h> #include <linux/ww_mutex.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/rt.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/debug.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/interrupt.h> diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index e852be4851fc..4a30ef63c607 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -63,6 +63,7 @@ enum qlock_stats { */ #include <linux/debugfs.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/fs.h> static const char * const qstat_names[qstat_num + 1] = { diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 62b6cee8ea7f..97ee9df32e0f 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -18,6 +18,7 @@ */ #include <linux/sched.h> #include <linux/sched/rt.h> +#include <linux/sched/debug.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/spinlock.h> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index d340be3a488f..6edc32ecd9c5 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -12,9 +12,11 @@ */ #include <linux/spinlock.h> #include <linux/export.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/rt.h> #include <linux/sched/deadline.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/debug.h> #include <linux/timer.h> #include "rtmutex_common.h" diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 990134617b4c..856dfff5c33a 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -13,6 +13,7 @@ #define __KERNEL_RTMUTEX_COMMON_H #include <linux/rtmutex.h> +#include <linux/sched/wake_q.h> /* * This is the control structure for tasks blocked on a rt_mutex, diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 5eacab880f67..7bc24d477805 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -6,7 +6,8 @@ * - Derived also from comments by Linus */ #include <linux/rwsem.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/export.h> enum rwsem_waiter_type { diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2ad8d8dc3bb1..34e727f18e49 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -10,10 +10,12 @@ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. */ #include <linux/rwsem.h> -#include <linux/sched.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/sched/signal.h> #include <linux/sched/rt.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/debug.h> #include <linux/osq_lock.h> #include "rwsem.h" diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 45ba475d4be3..90a74ccd85a4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/export.h> #include <linux/rwsem.h> #include <linux/atomic.h> diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 9512e37637dc..561acdd39960 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -29,6 +29,7 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/semaphore.h> #include <linux/spinlock.h> #include <linux/ftrace.h> diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index da6c9a34f62f..6b7abb334ca6 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work) if (mtx->flags & TEST_MTX_TRY) { while (!ww_mutex_trylock(&mtx->mutex)) - cpu_relax(); + cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); } @@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags) ret = -EINVAL; break; } - cpu_relax(); + cond_resched(); } while (time_before(jiffies, timeout)); } else { ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); @@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); if (ret) return ret; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e59eed..f6c5d330059a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include <linux/file.h> #include <linux/syscalls.h> #include <linux/cgroup.h> +#include <linux/perf_event.h> static struct kmem_cache *nsproxy_cachep; @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) goto out; } switch_task_namespaces(tsk, new_nsproxy); + + perf_event_namespaces(tsk); out: fput(file); return err; diff --git a/kernel/panic.c b/kernel/panic.c index 3ec16e603e88..a58932b41700 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -9,6 +9,7 @@ * to indicate a major problem. */ #include <linux/debug_locks.h> +#include <linux/sched/debug.h> #include <linux/interrupt.h> #include <linux/kmsg_dump.h> #include <linux/kallsyms.h> diff --git a/kernel/pid.c b/kernel/pid.c index 0291804151b5..0143ac0ddceb 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -38,6 +38,7 @@ #include <linux/syscalls.h> #include <linux/proc_ns.h> #include <linux/proc_fs.h> +#include <linux/sched/task.h> #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index eef2ce968636..de461aa0bf9a 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -12,12 +12,15 @@ #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/syscalls.h> +#include <linux/cred.h> #include <linux/err.h> #include <linux/acct.h> #include <linux/slab.h> #include <linux/proc_ns.h> #include <linux/reboot.h> #include <linux/export.h> +#include <linux/sched/task.h> +#include <linux/sched/signal.h> struct pid_cache { int nr_ids; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 86385af1080f..a8b978c35a6a 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -10,6 +10,8 @@ * This file is released under the GPLv2. */ +#define pr_fmt(fmt) "PM: " fmt + #include <linux/export.h> #include <linux/suspend.h> #include <linux/syscalls.h> @@ -21,6 +23,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/pm.h> +#include <linux/nmi.h> #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> @@ -104,7 +107,7 @@ EXPORT_SYMBOL(system_entering_hibernation); #ifdef CONFIG_PM_DEBUG static void hibernation_debug_sleep(void) { - printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n"); + pr_info("hibernation debug: Waiting for 5 seconds.\n"); mdelay(5000); } @@ -250,10 +253,9 @@ void swsusp_show_speed(ktime_t start, ktime_t stop, centisecs = 1; /* avoid div-by-zero */ k = nr_pages * (PAGE_SIZE / 1024); kps = (k * 100) / centisecs; - printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n", - msg, k, - centisecs / 100, centisecs % 100, - kps / 1000, (kps % 1000) / 10); + pr_info("%s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n", + msg, k, centisecs / 100, centisecs % 100, kps / 1000, + (kps % 1000) / 10); } /** @@ -271,8 +273,7 @@ static int create_image(int platform_mode) error = dpm_suspend_end(PMSG_FREEZE); if (error) { - printk(KERN_ERR "PM: Some devices failed to power down, " - "aborting hibernation\n"); + pr_err("Some devices failed to power down, aborting hibernation\n"); return error; } @@ -288,8 +289,7 @@ static int create_image(int platform_mode) error = syscore_suspend(); if (error) { - printk(KERN_ERR "PM: Some system devices failed to power down, " - "aborting hibernation\n"); + pr_err("Some system devices failed to power down, aborting hibernation\n"); goto Enable_irqs; } @@ -304,8 +304,8 @@ static int create_image(int platform_mode) restore_processor_state(); trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); if (error) - printk(KERN_ERR "PM: Error %d creating hibernation image\n", - error); + pr_err("Error %d creating hibernation image\n", error); + if (!in_suspend) { events_check_enabled = false; clear_free_pages(); @@ -432,8 +432,7 @@ static int resume_target_kernel(bool platform_mode) error = dpm_suspend_end(PMSG_QUIESCE); if (error) { - printk(KERN_ERR "PM: Some devices failed to power down, " - "aborting resume\n"); + pr_err("Some devices failed to power down, aborting resume\n"); return error; } @@ -608,6 +607,22 @@ static void power_down(void) { #ifdef CONFIG_SUSPEND int error; + + if (hibernation_mode == HIBERNATION_SUSPEND) { + error = suspend_devices_and_enter(PM_SUSPEND_MEM); + if (error) { + hibernation_mode = hibernation_ops ? + HIBERNATION_PLATFORM : + HIBERNATION_SHUTDOWN; + } else { + /* Restore swap signature. */ + error = swsusp_unmark(); + if (error) + pr_err("Swap will be unusable! Try swapon -a.\n"); + + return; + } + } #endif switch (hibernation_mode) { @@ -620,32 +635,13 @@ static void power_down(void) if (pm_power_off) kernel_power_off(); break; -#ifdef CONFIG_SUSPEND - case HIBERNATION_SUSPEND: - error = suspend_devices_and_enter(PM_SUSPEND_MEM); - if (error) { - if (hibernation_ops) - hibernation_mode = HIBERNATION_PLATFORM; - else - hibernation_mode = HIBERNATION_SHUTDOWN; - power_down(); - } - /* - * Restore swap signature. - */ - error = swsusp_unmark(); - if (error) - printk(KERN_ERR "PM: Swap will be unusable! " - "Try swapon -a.\n"); - return; -#endif } kernel_halt(); /* * Valid image is on the disk, if we continue we risk serious data * corruption after resume. */ - printk(KERN_CRIT "PM: Please power down manually\n"); + pr_crit("Power down manually\n"); while (1) cpu_relax(); } @@ -655,7 +651,7 @@ static int load_image_and_restore(void) int error; unsigned int flags; - pr_debug("PM: Loading hibernation image.\n"); + pr_debug("Loading hibernation image.\n"); lock_device_hotplug(); error = create_basic_memory_bitmaps(); @@ -667,7 +663,7 @@ static int load_image_and_restore(void) if (!error) hibernation_restore(flags & SF_PLATFORM_MODE); - printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); + pr_err("Failed to load hibernation image, recovering.\n"); swsusp_free(); free_basic_memory_bitmaps(); Unlock: @@ -685,7 +681,7 @@ int hibernate(void) bool snapshot_test = false; if (!hibernation_available()) { - pr_debug("PM: Hibernation not available.\n"); + pr_debug("Hibernation not available.\n"); return -EPERM; } @@ -703,9 +699,9 @@ int hibernate(void) goto Exit; } - printk(KERN_INFO "PM: Syncing filesystems ... "); + pr_info("Syncing filesystems ... \n"); sys_sync(); - printk("done.\n"); + pr_info("done.\n"); error = freeze_processes(); if (error) @@ -731,7 +727,7 @@ int hibernate(void) else flags |= SF_CRC32_MODE; - pr_debug("PM: writing image.\n"); + pr_debug("Writing image.\n"); error = swsusp_write(flags); swsusp_free(); if (!error) { @@ -743,7 +739,7 @@ int hibernate(void) in_suspend = 0; pm_restore_gfp_mask(); } else { - pr_debug("PM: Image restored successfully.\n"); + pr_debug("Image restored successfully.\n"); } Free_bitmaps: @@ -751,7 +747,7 @@ int hibernate(void) Thaw: unlock_device_hotplug(); if (snapshot_test) { - pr_debug("PM: Checking hibernation image\n"); + pr_debug("Checking hibernation image\n"); error = swsusp_check(); if (!error) error = load_image_and_restore(); @@ -815,10 +811,10 @@ static int software_resume(void) goto Unlock; } - pr_debug("PM: Checking hibernation image partition %s\n", resume_file); + pr_debug("Checking hibernation image partition %s\n", resume_file); if (resume_delay) { - printk(KERN_INFO "Waiting %dsec before reading resume device...\n", + pr_info("Waiting %dsec before reading resume device ...\n", resume_delay); ssleep(resume_delay); } @@ -857,10 +853,10 @@ static int software_resume(void) } Check_image: - pr_debug("PM: Hibernation image partition %d:%d present\n", + pr_debug("Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); - pr_debug("PM: Looking for hibernation image.\n"); + pr_debug("Looking for hibernation image.\n"); error = swsusp_check(); if (error) goto Unlock; @@ -879,7 +875,7 @@ static int software_resume(void) goto Close_Finish; } - pr_debug("PM: Preparing processes for restore.\n"); + pr_debug("Preparing processes for restore.\n"); error = freeze_processes(); if (error) goto Close_Finish; @@ -892,7 +888,7 @@ static int software_resume(void) /* For success case, the suspend path will release the lock */ Unlock: mutex_unlock(&pm_mutex); - pr_debug("PM: Hibernation image not present or could not be loaded.\n"); + pr_debug("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: swsusp_close(FMODE_READ); @@ -1016,7 +1012,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, error = -EINVAL; if (!error) - pr_debug("PM: Hibernation mode set to '%s'\n", + pr_debug("Hibernation mode set to '%s'\n", hibernation_modes[mode]); unlock_system_sleep(); return error ? error : n; @@ -1052,7 +1048,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, lock_system_sleep(); swsusp_resume_device = res; unlock_system_sleep(); - printk(KERN_INFO "PM: Starting manual resume from disk\n"); + pr_info("Starting manual resume from disk\n"); noresume = 0; software_resume(); return n; diff --git a/kernel/power/process.c b/kernel/power/process.c index 2fba066e125f..c7209f060eeb 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -12,6 +12,8 @@ #include <linux/oom.h> #include <linux/suspend.h> #include <linux/module.h> +#include <linux/sched/debug.h> +#include <linux/sched/task.h> #include <linux/syscalls.h> #include <linux/freezer.h> #include <linux/delay.h> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 905d5bbd595f..d79a38de425a 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -22,6 +22,7 @@ #include <linux/device.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/nmi.h> #include <linux/syscalls.h> #include <linux/console.h> #include <linux/highmem.h> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 34da86e73d00..2984fb0f0257 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -45,6 +45,9 @@ #include <linux/utsname.h> #include <linux/ctype.h> #include <linux/uio.h> +#include <linux/sched/clock.h> +#include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> #include <linux/uaccess.h> #include <asm/sections.h> diff --git a/kernel/profile.c b/kernel/profile.c index f67ce0aa6bc4..9aa2a4445b0d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -25,6 +25,8 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/sched/stat.h> + #include <asm/sections.h> #include <asm/irq_regs.h> #include <asm/ptrace.h> diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 49ba7c1ade9d..0af928712174 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -10,6 +10,9 @@ #include <linux/capability.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> +#include <linux/sched/task.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/highmem.h> diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 123ccbd22449..a4a86fb47e4a 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -30,6 +30,7 @@ #include <linux/rcupdate.h> #include <linux/interrupt.h> #include <linux/sched.h> +#include <uapi/linux/sched/types.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/completion.h> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d81345be730e..cccc417a8135 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -32,7 +32,8 @@ #include <linux/smp.h> #include <linux/rcupdate.h> #include <linux/interrupt.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <uapi/linux/sched/types.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/completion.h> diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index e773129c8b08..ef3bcfb15b39 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -30,7 +30,7 @@ #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/preempt.h> -#include <linux/rcupdate.h> +#include <linux/rcupdate_wait.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/delay.h> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index fa6a48d3917b..6ad330dbbae2 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -25,7 +25,7 @@ #include <linux/completion.h> #include <linux/interrupt.h> #include <linux/notifier.h> -#include <linux/rcupdate.h> +#include <linux/rcupdate_wait.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/mutex.h> @@ -47,6 +47,18 @@ static void __call_rcu(struct rcu_head *head, #include "tiny_plugin.h" +void rcu_barrier_bh(void) +{ + wait_rcu_gp(call_rcu_bh); +} +EXPORT_SYMBOL(rcu_barrier_bh); + +void rcu_barrier_sched(void) +{ + wait_rcu_gp(call_rcu_sched); +} +EXPORT_SYMBOL(rcu_barrier_sched); + #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d80e0d2f68c6..50fee7689e71 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -32,9 +32,10 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/smp.h> -#include <linux/rcupdate.h> +#include <linux/rcupdate_wait.h> #include <linux/interrupt.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/nmi.h> #include <linux/atomic.h> #include <linux/bitops.h> @@ -49,6 +50,7 @@ #include <linux/kernel_stat.h> #include <linux/wait.h> #include <linux/kthread.h> +#include <uapi/linux/sched/types.h> #include <linux/prefetch.h> #include <linux/delay.h> #include <linux/stop_machine.h> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index b60f2b6caa14..ec62a05bfdb3 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -24,6 +24,7 @@ #include <linux/cache.h> #include <linux/spinlock.h> +#include <linux/rtmutex.h> #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/seqlock.h> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a240f3308be6..0a62a8f1caac 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -27,7 +27,9 @@ #include <linux/delay.h> #include <linux/gfp.h> #include <linux/oom.h> +#include <linux/sched/debug.h> #include <linux/smpboot.h> +#include <uapi/linux/sched/types.h> #include "../time/tick-internal.h" #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 9e03db9ea9c0..55c8530316c7 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -36,7 +36,8 @@ #include <linux/spinlock.h> #include <linux/smp.h> #include <linux/interrupt.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/percpu.h> @@ -49,6 +50,7 @@ #include <linux/moduleparam.h> #include <linux/kthread.h> #include <linux/tick.h> +#include <linux/rcupdate_wait.h> #define CREATE_TRACE_POINTS diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h index 890c95f2587a..ce40c810cd5c 100644 --- a/kernel/sched/autogroup.h +++ b/kernel/sched/autogroup.h @@ -2,6 +2,7 @@ #include <linux/kref.h> #include <linux/rwsem.h> +#include <linux/sched/autogroup.h> struct autogroup { /* diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index ad64efe41722..a08795e21628 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -58,6 +58,8 @@ #include <linux/percpu.h> #include <linux/ktime.h> #include <linux/sched.h> +#include <linux/nmi.h> +#include <linux/sched/clock.h> #include <linux/static_key.h> #include <linux/workqueue.h> #include <linux/compiler.h> diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index f063a25d4449..53f9558fa925 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -11,7 +11,8 @@ * Waiting for completion is a typically sync point, but not an exclusion point. */ -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/completion.h> /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bbfb917a9b49..3b31fc05a0f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6,10 +6,15 @@ * Copyright (C) 1991-2002 Linus Torvalds */ #include <linux/sched.h> +#include <linux/sched/clock.h> +#include <uapi/linux/sched/types.h> +#include <linux/sched/loadavg.h> +#include <linux/sched/hotplug.h> #include <linux/cpuset.h> #include <linux/delayacct.h> #include <linux/init_task.h> #include <linux/context_tracking.h> +#include <linux/rcupdate_wait.h> #include <linux/blkdev.h> #include <linux/kprobes.h> @@ -981,7 +986,7 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_ return rq; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) return rq; rq = move_queued_task(rq, p, dest_cpu); @@ -1259,10 +1264,10 @@ static int migrate_swap_stop(void *data) if (task_cpu(arg->src_task) != arg->src_cpu) goto unlock; - if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task))) + if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) goto unlock; - if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task))) + if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) goto unlock; __migrate_swap_task(arg->src_task, arg->dst_cpu); @@ -1303,10 +1308,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; - if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task))) + if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) goto out; - if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task))) + if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) goto out; trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); @@ -1490,14 +1495,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for_each_cpu(dest_cpu, nodemask) { if (!cpu_active(dest_cpu)) continue; - if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) return dest_cpu; } } for (;;) { /* Any allowed, online CPU? */ - for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { + for_each_cpu(dest_cpu, &p->cpus_allowed) { if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) continue; if (!cpu_online(dest_cpu)) @@ -1549,10 +1554,10 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) { lockdep_assert_held(&p->pi_lock); - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else - cpu = cpumask_any(tsk_cpus_allowed(p)); + cpu = cpumask_any(&p->cpus_allowed); /* * In order not to call set_task_cpu() on a blocking task we need @@ -1564,7 +1569,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ - if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || + if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || !cpu_online(cpu))) cpu = select_fallback_rq(task_cpu(p), p); @@ -3211,6 +3216,15 @@ static inline void preempt_latency_start(int val) { } static inline void preempt_latency_stop(int val) { } #endif +static inline unsigned long get_preempt_disable_ip(struct task_struct *p) +{ +#ifdef CONFIG_DEBUG_PREEMPT + return p->preempt_disable_ip; +#else + return 0; +#endif +} + /* * Print scheduling while atomic bug: */ @@ -3273,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; /* - * Optimization: we know that if all tasks are in - * the fair class we can call that function directly: + * Optimization: we know that if all tasks are in the fair class we can + * call that function directly, but only if the @prev task wasn't of a + * higher scheduling class, because otherwise those loose the + * opportunity to pull in more work from other CPUs. */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely((prev->sched_class == &idle_sched_class || + prev->sched_class == &fair_sched_class) && + rq->nr_running == rq->cfs.h_nr_running)) { + p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; @@ -5233,6 +5252,9 @@ void sched_show_task(struct task_struct *p) int ppid; unsigned long state = p->state; + /* Make sure the string lines up properly with the number of task states: */ + BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); + if (!try_get_task_stack(p)) return; if (state) @@ -5461,7 +5483,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (curr_cpu == target_cpu) return 0; - if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) return -EINVAL; /* TODO: This is not properly updating schedstats */ diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index e73119013c53..fba235c7d026 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -128,10 +128,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && - cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) { + cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { best_cpu = cpumask_any(later_mask); goto out; - } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) && + } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { best_cpu = cpudl_maximum(cp); if (later_mask) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index fd4659313640..cd7cd489f739 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -13,6 +13,7 @@ #include <linux/cpufreq.h> #include <linux/kthread.h> +#include <uapi/linux/sched/types.h> #include <linux/slab.h> #include <trace/events/power.h> @@ -35,6 +36,7 @@ struct sugov_policy { u64 last_freq_update_time; s64 freq_update_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -51,7 +53,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -115,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -135,19 +136,18 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -212,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); } sugov_update_commit(sg_policy, time, next_f); } @@ -266,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, @@ -579,6 +579,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); @@ -589,7 +590,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->max = 0; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; sg_cpu->iowait_boost = 0; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 11e9705bf937..981fcd7dc394 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (skip) continue; - if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids) + if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) continue; if (lowest_mask) { - cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask); + cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); /* * We have to ensure that we have at least one bit diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 2ecec3a4f1ee..f3778e2b46c8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -4,12 +4,8 @@ #include <linux/kernel_stat.h> #include <linux/static_key.h> #include <linux/context_tracking.h> -#include <linux/cputime.h> +#include <linux/sched/cputime.h> #include "sched.h" -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#endif - #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 27737f34757d..99b2c33a9fbc 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) dl_rq->dl_nr_migratory++; update_dl_migration(dl_rq); @@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) dl_rq->dl_nr_migratory--; update_dl_migration(dl_rq); @@ -252,7 +252,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * If we cannot preempt any rq, fall back to pick any * online cpu. */ - cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p)); + cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); if (cpu >= nr_cpu_ids) { /* * Fail to find any suitable cpu. @@ -958,7 +958,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) enqueue_dl_entity(&p->dl, pi_se, flags); - if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); } @@ -1032,9 +1032,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) * try to make it stay here, it might be important. */ if (unlikely(dl_task(curr)) && - (tsk_nr_cpus_allowed(curr) < 2 || + (curr->nr_cpus_allowed < 2 || !dl_entity_preempt(&p->dl, &curr->dl)) && - (tsk_nr_cpus_allowed(p) > 1)) { + (p->nr_cpus_allowed > 1)) { int target = find_later_rq(p); if (target != -1 && @@ -1055,7 +1055,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) * Current can't be migrated, useless to reschedule, * let's hope p can move out. */ - if (tsk_nr_cpus_allowed(rq->curr) == 1 || + if (rq->curr->nr_cpus_allowed == 1 || cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) return; @@ -1063,7 +1063,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else. */ - if (tsk_nr_cpus_allowed(p) != 1 && + if (p->nr_cpus_allowed != 1 && cpudl_find(&rq->rd->cpudl, p, NULL) != -1) return; @@ -1178,7 +1178,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { update_curr_dl(rq); - if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1) + if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); } @@ -1235,7 +1235,7 @@ static void set_curr_task_dl(struct rq *rq) static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + cpumask_test_cpu(cpu, &p->cpus_allowed)) return 1; return 0; } @@ -1279,7 +1279,7 @@ static int find_later_rq(struct task_struct *task) if (unlikely(!later_mask)) return -1; - if (tsk_nr_cpus_allowed(task) == 1) + if (task->nr_cpus_allowed == 1) return -1; /* @@ -1384,8 +1384,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(later_rq->cpu, - tsk_cpus_allowed(task)) || + !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || task_running(rq, task) || !dl_task(task) || !task_on_rq_queued(task))) { @@ -1425,7 +1424,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(tsk_nr_cpus_allowed(p) <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!task_on_rq_queued(p)); BUG_ON(!dl_task(p)); @@ -1464,7 +1463,7 @@ retry: */ if (dl_task(rq->curr) && dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && - tsk_nr_cpus_allowed(rq->curr) > 1) { + rq->curr->nr_cpus_allowed > 1) { resched_curr(rq); return 0; } @@ -1611,9 +1610,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - tsk_nr_cpus_allowed(p) > 1 && + p->nr_cpus_allowed > 1 && dl_task(rq->curr) && - (tsk_nr_cpus_allowed(rq->curr) < 2 || + (rq->curr->nr_cpus_allowed < 2 || !dl_entity_preempt(&p->dl, &rq->curr->dl))) { push_dl_tasks(rq); } @@ -1727,7 +1726,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) if (rq->curr != p) { #ifdef CONFIG_SMP - if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded) + if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) queue_push_tasks(rq); #endif if (dl_task(rq->curr)) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 109adc0e9cb9..38f019324f1a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -11,7 +11,8 @@ */ #include <linux/proc_fs.h> -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> #include <linux/utsname.h> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 274c747a01ce..dea138964b91 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -20,7 +20,9 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/topology.h> + #include <linux/latencytop.h> #include <linux/cpumask.h> #include <linux/cpuidle.h> @@ -1551,7 +1553,7 @@ static void task_numa_compare(struct task_numa_env *env, */ if (cur) { /* Skip this swap candidate if cannot move to the source cpu */ - if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur))) + if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) goto unlock; /* @@ -1661,7 +1663,7 @@ static void task_numa_find_cpu(struct task_numa_env *env, for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { /* Skip this CPU if the source task cannot migrate */ - if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(env->p))) + if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) continue; env->dst_cpu = cpu; @@ -5458,7 +5460,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_cpus(group), - tsk_cpus_allowed(p))) + &p->cpus_allowed)) continue; local_group = cpumask_test_cpu(this_cpu, @@ -5578,7 +5580,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) return cpumask_first(sched_group_cpus(group)); /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { + for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { if (idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -5717,7 +5719,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int if (!test_idle_cores(target, false)) return -1; - cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p)); + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); for_each_cpu_wrap(core, cpus, target, wrap) { bool idle = true; @@ -5751,7 +5753,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t return -1; for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; if (idle_cpu(cpu)) return cpu; @@ -5797,13 +5799,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) { - if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; if (idle_cpu(cpu)) break; @@ -5958,7 +5960,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) - && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + && cpumask_test_cpu(cpu, &p->cpus_allowed); } rcu_read_lock(); @@ -6698,7 +6700,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) return 0; - if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { + if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { int cpu; schedstat_inc(p->se.statistics.nr_failed_migrations_affine); @@ -6718,7 +6720,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* Prevent to re-select dst_cpu via env's cpus */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { - if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) { + if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break; @@ -7252,7 +7254,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) /* * Group imbalance indicates (and tries to solve) the problem where balancing - * groups is inadequate due to tsk_cpus_allowed() constraints. + * groups is inadequate due to ->cpus_allowed constraints. * * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a * cpumask covering 1 cpu of the first group and 3 cpus of the second group. @@ -8211,8 +8213,7 @@ more_balance: * if the curr task on busiest cpu can't be * moved to this_cpu */ - if (!cpumask_test_cpu(this_cpu, - tsk_cpus_allowed(busiest->curr))) { + if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { raw_spin_unlock_irqrestore(&busiest->lock, flags); env.flags |= LBF_ALL_PINNED; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 6a4bae0a649d..ac6d5176463d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -2,6 +2,7 @@ * Generic entry point for the idle threads */ #include <linux/sched.h> +#include <linux/sched/idle.h> #include <linux/cpu.h> #include <linux/cpuidle.h> #include <linux/cpuhotplug.h> diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index a2d6eb71f06b..7296b7308eca 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -7,6 +7,7 @@ */ #include <linux/export.h> +#include <linux/sched/loadavg.h> #include "sched.h" diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e8836cfc4cdb..9f3e40226dec 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -335,7 +335,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -352,7 +352,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (tsk_nr_cpus_allowed(p) > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags); - if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (tsk_nr_cpus_allowed(curr) < 2 || + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio)) { int target = find_lowest_rq(p); @@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * Current can't be migrated, useless to reschedule, * let's hope p can move out. */ - if (tsk_nr_cpus_allowed(rq->curr) == 1 || + if (rq->curr->nr_cpus_allowed == 1 || !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) return; @@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else. */ - if (tsk_nr_cpus_allowed(p) != 1 + if (p->nr_cpus_allowed != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1) + if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1591,7 +1591,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + cpumask_test_cpu(cpu, &p->cpus_allowed)) return 1; return 0; } @@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (tsk_nr_cpus_allowed(task) == 1) + if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1726,8 +1726,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, - tsk_cpus_allowed(task)) || + !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || task_running(rq, task) || !rt_task(task) || !task_on_rq_queued(task))) { @@ -1762,7 +1761,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(tsk_nr_cpus_allowed(p) <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!task_on_rq_queued(p)); BUG_ON(!rt_task(p)); @@ -2122,9 +2121,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - tsk_nr_cpus_allowed(p) > 1 && + p->nr_cpus_allowed > 1 && (dl_task(rq->curr) || rt_task(rq->curr)) && - (tsk_nr_cpus_allowed(rq->curr) < 2 || + (rq->curr->nr_cpus_allowed < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -2197,7 +2196,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) */ if (task_on_rq_queued(p) && rq->curr != p) { #ifdef CONFIG_SMP - if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded) + if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); #endif /* CONFIG_SMP */ if (p->prio < rq->curr->prio) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 71b10a9b73cf..5cbf92214ad8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,9 +1,26 @@ #include <linux/sched.h> +#include <linux/sched/autogroup.h> #include <linux/sched/sysctl.h> +#include <linux/sched/topology.h> #include <linux/sched/rt.h> -#include <linux/u64_stats_sync.h> #include <linux/sched/deadline.h> +#include <linux/sched/clock.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/signal.h> +#include <linux/sched/numa_balancing.h> +#include <linux/sched/mm.h> +#include <linux/sched/cpufreq.h> +#include <linux/sched/stat.h> +#include <linux/sched/nohz.h> +#include <linux/sched/debug.h> +#include <linux/sched/hotplug.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> +#include <linux/sched/init.h> + +#include <linux/u64_stats_sync.h> #include <linux/kernel_stat.h> #include <linux/binfmts.h> #include <linux/mutex.h> @@ -13,6 +30,10 @@ #include <linux/tick.h> #include <linux/slab.h> +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif + #include "cpupri.h" #include "cpudeadline.h" #include "cpuacct.h" @@ -1817,7 +1838,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); - #ifdef CONFIG_NUMA_BALANCING extern void show_numa_stats(struct task_struct *p, struct seq_file *m); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index bf0da0aa0a14..d5710651043b 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -164,114 +164,3 @@ sched_info_switch(struct rq *rq, #define sched_info_arrive(rq, next) do { } while (0) #define sched_info_switch(rq, t, next) do { } while (0) #endif /* CONFIG_SCHED_INFO */ - -/* - * The following are functions that support scheduler-internal time accounting. - * These functions are generally called at the timer tick. None of this depends - * on CONFIG_SCHEDSTATS. - */ - -/** - * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running - * - * @tsk: Pointer to target task. - */ -#ifdef CONFIG_POSIX_TIMERS -static inline -struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) -{ - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - - /* Check if cputimer isn't running. This is accessed without locking. */ - if (!READ_ONCE(cputimer->running)) - return NULL; - - /* - * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime - * in __exit_signal(), we won't account to the signal struct further - * cputime consumed by that task, even though the task can still be - * ticking after __exit_signal(). - * - * In order to keep a consistent behaviour between thread group cputime - * and thread group cputimer accounting, lets also ignore the cputime - * elapsing after __exit_signal() in any thread group timer running. - * - * This makes sure that POSIX CPU clocks and timers are synchronized, so - * that a POSIX CPU timer won't expire while the corresponding POSIX CPU - * clock delta is behind the expiring timer value. - */ - if (unlikely(!tsk->sighand)) - return NULL; - - return cputimer; -} -#else -static inline -struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) -{ - return NULL; -} -#endif - -/** - * account_group_user_time - Maintain utime for a thread group. - * - * @tsk: Pointer to task structure. - * @cputime: Time value by which to increment the utime field of the - * thread_group_cputime structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the utime field there. - */ -static inline void account_group_user_time(struct task_struct *tsk, - u64 cputime) -{ - struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - - if (!cputimer) - return; - - atomic64_add(cputime, &cputimer->cputime_atomic.utime); -} - -/** - * account_group_system_time - Maintain stime for a thread group. - * - * @tsk: Pointer to task structure. - * @cputime: Time value by which to increment the stime field of the - * thread_group_cputime structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the stime field there. - */ -static inline void account_group_system_time(struct task_struct *tsk, - u64 cputime) -{ - struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - - if (!cputimer) - return; - - atomic64_add(cputime, &cputimer->cputime_atomic.stime); -} - -/** - * account_group_exec_runtime - Maintain exec runtime for a thread group. - * - * @tsk: Pointer to task structure. - * @ns: Time value by which to increment the sum_exec_runtime field - * of the thread_group_cputime structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the sum_exec_runtime field there. - */ -static inline void account_group_exec_runtime(struct task_struct *tsk, - unsigned long long ns) -{ - struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - - if (!cputimer) - return; - - atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); -} diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 82f0dff90030..3d5610dcce11 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -1,4 +1,4 @@ -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/swait.h> void __init_swait_queue_head(struct swait_queue_head *q, const char *name, diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9453efe9b25a..b8c84c6dee64 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -5,7 +5,8 @@ */ #include <linux/init.h> #include <linux/export.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/mm.h> #include <linux/wait.h> #include <linux/hash.h> @@ -241,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_event); +/* + * Note! These two wait functions are entered with the + * wait-queue lock held (and interrupts off in the _irq + * case), so there is no race with testing the wakeup + * condition in the caller before they add the wait + * entry to the wake queue. + */ +int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock(&wq->lock); + schedule(); + spin_lock(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr); + +int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait) +{ + if (likely(list_empty(&wait->task_list))) + __add_wait_queue_tail(wq, wait); + + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + return -ERESTARTSYS; + + spin_unlock_irq(&wq->lock); + schedule(); + spin_lock_irq(&wq->lock); + return 0; +} +EXPORT_SYMBOL(do_wait_intr_irq); + /** * finish_wait - clean up after waiting in a queue * @q: waitqueue waited on diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e15185c28de5..65f61077ad50 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include <linux/compat.h> #include <linux/coredump.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/seccomp.h> #include <linux/slab.h> #include <linux/syscalls.h> diff --git a/kernel/signal.c b/kernel/signal.c index 214a8feeb771..7e59ebc2c25e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -13,7 +13,12 @@ #include <linux/slab.h> #include <linux/export.h> #include <linux/init.h> -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/user.h> +#include <linux/sched/debug.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> #include <linux/fs.h> #include <linux/tty.h> #include <linux/binfmts.h> @@ -2395,11 +2400,11 @@ void exit_signals(struct task_struct *tsk) * @tsk is about to have PF_EXITING set - lock out users which * expect stable threadgroup. */ - threadgroup_change_begin(tsk); + cgroup_threadgroup_change_begin(tsk); if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { tsk->flags |= PF_EXITING; - threadgroup_change_end(tsk); + cgroup_threadgroup_change_end(tsk); return; } @@ -2410,7 +2415,7 @@ void exit_signals(struct task_struct *tsk) */ tsk->flags |= PF_EXITING; - threadgroup_change_end(tsk); + cgroup_threadgroup_change_end(tsk); if (!signal_pending(tsk)) goto out; diff --git a/kernel/smp.c b/kernel/smp.c index 77fcdb9f2775..a817769b53c0 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -17,6 +17,7 @@ #include <linux/smp.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/sched/idle.h> #include <linux/hypervisor.h> #include "smpboot.h" diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 4a5c6e73ecd4..1d71c051a951 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/export.h> #include <linux/percpu.h> #include <linux/kthread.h> diff --git a/kernel/sys.c b/kernel/sys.c index b07adca97ea3..7ff6d1b10cec 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -49,6 +49,13 @@ #include <linux/binfmts.h> #include <linux/sched.h> +#include <linux/sched/autogroup.h> +#include <linux/sched/loadavg.h> +#include <linux/sched/stat.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> +#include <linux/sched/task.h> +#include <linux/sched/cputime.h> #include <linux/rcupdate.h> #include <linux/uidgid.h> #include <linux/cred.h> diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bb260ceb3718..acf0a5a06da7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -63,6 +63,7 @@ #include <linux/capability.h> #include <linux/binfmts.h> #include <linux/sched/sysctl.h> +#include <linux/sched/coredump.h> #include <linux/kexec.h> #include <linux/bpf.h> #include <linux/mount.h> diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index e6dc9a538efa..ce3a31e8eb36 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -19,6 +19,8 @@ #include <linux/hrtimer.h> #include <linux/timerqueue.h> #include <linux/rtc.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/alarmtimer.h> #include <linux/mutex.h> #include <linux/platform_device.h> diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8e11d8d9f419..ec08f527d7ee 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -43,10 +43,12 @@ #include <linux/seq_file.h> #include <linux/err.h> #include <linux/debugobjects.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/sysctl.h> #include <linux/sched/rt.h> #include <linux/sched/deadline.h> +#include <linux/sched/nohz.h> +#include <linux/sched/debug.h> #include <linux/timer.h> #include <linux/freezer.h> diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index a95f13c31464..087d6a1279b8 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -10,6 +10,8 @@ #include <linux/interrupt.h> #include <linux/syscalls.h> #include <linux/time.h> +#include <linux/sched/signal.h> +#include <linux/sched/cputime.h> #include <linux/posix-timers.h> #include <linux/hrtimer.h> #include <trace/events/timer.h> diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7906b3f0c41a..497719127bf9 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second) shift_hz += cycles_per_tick/2; do_div(shift_hz, cycles_per_tick); /* Calculate nsec_per_tick using shift_hz */ - nsec_per_tick = (u64)TICK_NSEC << 8; + nsec_per_tick = (u64)NSEC_PER_SEC << 8; nsec_per_tick += (u32)shift_hz/2; do_div(nsec_per_tick, (u32)shift_hz); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index b4377a5e4269..4513ad16a253 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -2,7 +2,8 @@ * Implement CPU time clocks for the POSIX clock interface. */ -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/cputime.h> #include <linux/posix-timers.h> #include <linux/errno.h> #include <linux/math64.h> diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1e6623d76750..50a6a47020de 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/mutex.h> +#include <linux/sched/task.h> #include <linux/uaccess.h> #include <linux/list.h> diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a26036d37a38..ea6b610c4c57 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/syscore_ops.h> #include <linux/hrtimer.h> #include <linux/sched_clock.h> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2c115fdab397..7fe53be86077 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -17,8 +17,12 @@ #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/percpu.h> +#include <linux/nmi.h> #include <linux/profile.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/clock.h> +#include <linux/sched/stat.h> +#include <linux/sched/nohz.h> #include <linux/module.h> #include <linux/irq_work.h> #include <linux/posix-timers.h> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 95b258dd75db..5b63a2102c29 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -14,7 +14,9 @@ #include <linux/percpu.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/nmi.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> #include <linux/syscore_ops.h> #include <linux/clocksource.h> #include <linux/jiffies.h> diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 82a6bfa0c307..1dc0256bfb6e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -38,8 +38,10 @@ #include <linux/tick.h> #include <linux/kallsyms.h> #include <linux/irq_work.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/sysctl.h> +#include <linux/sched/nohz.h> +#include <linux/sched/debug.h> #include <linux/slab.h> #include <linux/compat.h> diff --git a/kernel/torture.c b/kernel/torture.c index 01a99976f072..55de96529287 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -30,6 +30,7 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/completion.h> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d4a06e714645..9619b5768e4b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -455,7 +455,7 @@ config UPROBE_EVENTS select UPROBES select PROBE_EVENTS select TRACING - default n + default y help This allows the user to add tracing events on top of userspace dynamic events (similar to tracepoints) on the fly via the trace diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0c0609326391..b9691ee8f6c1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -15,6 +15,7 @@ #include <linux/stop_machine.h> #include <linux/clocksource.h> +#include <linux/sched/task.h> #include <linux/kallsyms.h> #include <linux/seq_file.h> #include <linux/suspend.h> @@ -4415,16 +4416,24 @@ static int __init set_graph_notrace_function(char *str) } __setup("ftrace_graph_notrace=", set_graph_notrace_function); +static int __init set_graph_max_depth_function(char *str) +{ + if (!str) + return 0; + fgraph_max_depth = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("ftrace_graph_max_depth=", set_graph_max_depth_function); + static void __init set_ftrace_early_graph(char *buf, int enable) { int ret; char *func; struct ftrace_hash *hash; - if (enable) - hash = ftrace_graph_hash; - else - hash = ftrace_graph_notrace_hash; + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + if (WARN_ON(!hash)) + return; while (buf) { func = strsep(&buf, ","); @@ -4434,6 +4443,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); } + + if (enable) + ftrace_graph_hash = hash; + else + ftrace_graph_notrace_hash = hash; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -5487,7 +5501,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, * Normally the mcount trampoline will call the ops->func, but there * are times that it should not. For example, if the ops does not * have its own recursion protection, then it should call the - * ftrace_ops_recurs_func() instead. + * ftrace_ops_assist_func() instead. * * Returns the function that the trampoline should call for @ops. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a85739efcc30..96fc3c043ad6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6,6 +6,7 @@ #include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> +#include <linux/sched/clock.h> #include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/irq_work.h> diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 6df9a83e20d7..c190a4d5013c 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -6,6 +6,7 @@ #include <linux/ring_buffer.h> #include <linux/completion.h> #include <linux/kthread.h> +#include <uapi/linux/sched/types.h> #include <linux/module.h> #include <linux/ktime.h> #include <asm/local.h> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f35109514a01..0ed834d6beb0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4355,6 +4355,7 @@ static const char readme_msg[] = "\t -:[<group>/]<event>\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" + "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n" #endif #ifdef CONFIG_UPROBE_EVENTS "\t place: <path>:<offset>\n" diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 0f06532a755b..5fdc779f411d 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/ktime.h> #include <linux/trace_clock.h> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f3a960ed75a1..1c21d0e2a145 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -19,6 +19,7 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/stacktrace.h> +#include <linux/rculist.h> #include "tracing_map.h" #include "trace.h" diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 6721a1e89f39..f2ac9d44f6c4 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -22,6 +22,7 @@ #include <linux/ctype.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/rculist.h> #include "trace.h" diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index edfacd954e1b..21ea6ae77d93 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -44,6 +44,7 @@ #include <linux/uaccess.h> #include <linux/cpumask.h> #include <linux/delay.h> +#include <linux/sched/clock.h> #include "trace.h" static struct trace_array *hwlat_trace; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index eadd96ef772f..013f4e7146d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/rculist.h> #include "trace_probe.h" @@ -680,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv) return -EINVAL; } if (isdigit(argv[1][0])) { - if (is_return) { - pr_info("Return probe point must be a symbol.\n"); - return -EINVAL; - } /* an address specified */ ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { @@ -699,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv) pr_info("Failed to parse symbol.\n"); return ret; } - if (offset && is_return) { - pr_info("Return probe must be used without offset.\n"); + if (offset && is_return && + !function_offset_within_entry(NULL, symbol, offset)) { + pr_info("Given offset is not valid for return probe.\n"); return -EINVAL; } } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 070866c32eb9..02a4aeb22c47 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -8,6 +8,8 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/ftrace.h> +#include <linux/sched/clock.h> +#include <linux/sched/mm.h> #include "trace_output.h" diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index b0f86ea77881..cb917cebae29 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1,5 +1,6 @@ /* Include in trace.c */ +#include <uapi/linux/sched/types.h> #include <linux/stringify.h> #include <linux/kthread.h> #include <linux/delay.h> diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 2a1abbaca10e..5fb1f2c87e6b 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -2,6 +2,7 @@ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> * */ +#include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #include <linux/kallsyms.h> #include <linux/seq_file.h> @@ -64,7 +65,7 @@ void stack_trace_print(void) } /* - * When arch-specific code overides this function, the following + * When arch-specific code overrides this function, the following * data should be filled up, assuming stack_trace_max_lock is held to * prevent concurrent updates. * stack_trace_index[] diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f4379e772171..a7581fec9681 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -24,6 +24,7 @@ #include <linux/uprobes.h> #include <linux/namei.h> #include <linux/string.h> +#include <linux/rculist.h> #include "trace_probe.h" diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 1f9a31f934a4..685c50ae6300 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -24,7 +24,8 @@ #include <linux/tracepoint.h> #include <linux/err.h> #include <linux/slab.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/task.h> #include <linux/static_key.h> extern struct tracepoint * const __start___tracepoints_ptrs[]; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 5c21f0535056..370724b45391 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -17,7 +17,9 @@ */ #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> +#include <linux/sched/cputime.h> #include <linux/tsacct_kern.h> #include <linux/acct.h> #include <linux/jiffies.h> diff --git a/kernel/ucount.c b/kernel/ucount.c index 8a11fc0cb459..b4eeee03934f 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -8,6 +8,7 @@ #include <linux/stat.h> #include <linux/sysctl.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/hash.h> #include <linux/user_namespace.h> @@ -143,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - atomic_set(&new->count, 0); + new->count = 0; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -154,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) ucounts = new; } } - if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) + if (ucounts->count == INT_MAX) ucounts = NULL; + else + ucounts->count += 1; spin_unlock_irq(&ucounts_lock); return ucounts; } @@ -164,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + spin_lock_irqsave(&ucounts_lock, flags); + ucounts->count -= 1; + if (!ucounts->count) hlist_del_init(&ucounts->node); - spin_unlock_irqrestore(&ucounts_lock, flags); + else + ucounts = NULL; + spin_unlock_irqrestore(&ucounts_lock, flags); - kfree(ucounts); - } + kfree(ucounts); } static inline bool atomic_inc_below(atomic_t *v, int u) diff --git a/kernel/uid16.c b/kernel/uid16.c index 71645ae9303a..5c2dc5b2bf4f 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/highuid.h> #include <linux/security.h> +#include <linux/cred.h> #include <linux/syscalls.h> #include <linux/uaccess.h> diff --git a/kernel/user.c b/kernel/user.c index b069ccbfb0b0..00281add65b2 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/bitops.h> #include <linux/key.h> +#include <linux/sched/user.h> #include <linux/interrupt.h> #include <linux/export.h> #include <linux/user_namespace.h> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86b7854fec8e..2f735cbe05e8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/nsproxy.h> #include <linux/slab.h> +#include <linux/sched/signal.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> #include <linux/highuid.h> diff --git a/kernel/utsname.c b/kernel/utsname.c index 6976cd47dcf6..913fe4336d2b 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -14,8 +14,10 @@ #include <linux/utsname.h> #include <linux/err.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> +#include <linux/sched/task.h> static struct ucounts *inc_uts_namespaces(struct user_namespace *ns) { diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index c8eac43267e9..233cd8fc6910 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -14,6 +14,7 @@ #include <linux/utsname.h> #include <linux/sysctl.h> #include <linux/wait.h> +#include <linux/rwsem.h> #ifdef CONFIG_PROC_SYSCTL diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 63177be0159e..03e0b69bb5bf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -19,8 +19,11 @@ #include <linux/sysctl.h> #include <linux/smpboot.h> #include <linux/sched/rt.h> +#include <uapi/linux/sched/types.h> #include <linux/tick.h> #include <linux/workqueue.h> +#include <linux/sched/clock.h> +#include <linux/sched/debug.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index b5de262a9eb9..54a427d1f344 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -13,6 +13,8 @@ #include <linux/nmi.h> #include <linux/module.h> +#include <linux/sched/debug.h> + #include <asm/irq_regs.h> #include <linux/perf_event.h> diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 072cbc9b175d..c0168b7da1ea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); |