summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile12
-rw-r--r--kernel/auditsc.c4
-rw-r--r--kernel/events/core.c114
-rw-r--r--kernel/events/ring_buffer.c6
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/hung_task.c21
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/kcov.c273
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/memremap.c27
-rw-r--r--kernel/panic.c20
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/ptrace.c5
-rw-r--r--kernel/rcu/Makefile4
-rw-r--r--kernel/sched/Makefile4
-rw-r--r--kernel/sched/core.c36
-rw-r--r--kernel/sched/cpuacct.c35
-rw-r--r--kernel/sched/cpuacct.h4
-rw-r--r--kernel/sched/fair.c39
-rw-r--r--kernel/sched/sched.h13
-rw-r--r--kernel/seccomp.c4
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/time/tick-sched.c8
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/ftrace.c48
-rw-r--r--kernel/trace/trace.c76
-rw-r--r--kernel/trace/trace.h113
-rw-r--r--kernel/trace/trace_events_filter.c12
-rw-r--r--kernel/trace/trace_events_trigger.c88
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_functions_graph.c6
-rw-r--r--kernel/trace/trace_irqsoff.c9
-rw-r--r--kernel/trace/trace_kprobe.c27
-rw-r--r--kernel/trace/trace_mmiotrace.c2
-rw-r--r--kernel/trace/trace_nop.c4
-rw-r--r--kernel/trace/trace_output.c10
-rw-r--r--kernel/trace/trace_printk.c3
-rw-r--r--kernel/trace/trace_probe.c4
-rw-r--r--kernel/trace/trace_stat.c3
-rw-r--r--kernel/trace/trace_syscalls.c11
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/tracepoint.c2
45 files changed, 793 insertions, 291 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index baa55e50a315..f0c40bf49d9f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -18,6 +18,17 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
endif
+# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
+# in coverage traces.
+KCOV_INSTRUMENT_softirq.o := n
+# These are called from save_stack_trace() on slub debug path,
+# and produce insane amounts of uninteresting coverage.
+KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_extable.o := n
+# Don't self-instrument.
+KCOV_INSTRUMENT_kcov.o := n
+KASAN_SANITIZE_kcov.o := n
+
# cond_syscall is currently not LTO compatible
CFLAGS_sys_ni.o = $(DISABLE_LTO)
@@ -68,6 +79,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
+obj-$(CONFIG_KCOV) += kcov.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 195ffaee50b9..7d0e3cf8abe1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2412,8 +2412,8 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
return;
audit_log_task(ab);
audit_log_format(ab, " sig=%ld arch=%x syscall=%ld compat=%d ip=0x%lx code=0x%x",
- signr, syscall_get_arch(), syscall, is_compat_task(),
- KSTK_EIP(current), code);
+ signr, syscall_get_arch(), syscall,
+ in_compat_syscall(), KSTK_EIP(current), code);
audit_log_end(ab);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 712570dddacd..de24fbce5277 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -376,8 +376,11 @@ static void update_perf_cpu_limits(void)
u64 tmp = perf_sample_period_ns;
tmp *= sysctl_perf_cpu_time_max_percent;
- do_div(tmp, 100);
- ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
+ tmp = div_u64(tmp, 100);
+ if (!tmp)
+ tmp = 1;
+
+ WRITE_ONCE(perf_sample_allowed_ns, tmp);
}
static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -409,7 +412,13 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
if (ret || !write)
return ret;
- update_perf_cpu_limits();
+ if (sysctl_perf_cpu_time_max_percent == 100) {
+ printk(KERN_WARNING
+ "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
+ WRITE_ONCE(perf_sample_allowed_ns, 0);
+ } else {
+ update_perf_cpu_limits();
+ }
return 0;
}
@@ -423,62 +432,68 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
#define NR_ACCUMULATED_SAMPLES 128
static DEFINE_PER_CPU(u64, running_sample_length);
+static u64 __report_avg;
+static u64 __report_allowed;
+
static void perf_duration_warn(struct irq_work *w)
{
- u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
- u64 avg_local_sample_len;
- u64 local_samples_len;
-
- local_samples_len = __this_cpu_read(running_sample_length);
- avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
-
printk_ratelimited(KERN_WARNING
- "perf interrupt took too long (%lld > %lld), lowering "
- "kernel.perf_event_max_sample_rate to %d\n",
- avg_local_sample_len, allowed_ns >> 1,
- sysctl_perf_event_sample_rate);
+ "perf: interrupt took too long (%lld > %lld), lowering "
+ "kernel.perf_event_max_sample_rate to %d\n",
+ __report_avg, __report_allowed,
+ sysctl_perf_event_sample_rate);
}
static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
void perf_sample_event_took(u64 sample_len_ns)
{
- u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
- u64 avg_local_sample_len;
- u64 local_samples_len;
+ u64 max_len = READ_ONCE(perf_sample_allowed_ns);
+ u64 running_len;
+ u64 avg_len;
+ u32 max;
- if (allowed_ns == 0)
+ if (max_len == 0)
return;
- /* decay the counter by 1 average sample */
- local_samples_len = __this_cpu_read(running_sample_length);
- local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
- local_samples_len += sample_len_ns;
- __this_cpu_write(running_sample_length, local_samples_len);
+ /* Decay the counter by 1 average sample. */
+ running_len = __this_cpu_read(running_sample_length);
+ running_len -= running_len/NR_ACCUMULATED_SAMPLES;
+ running_len += sample_len_ns;
+ __this_cpu_write(running_sample_length, running_len);
/*
- * note: this will be biased artifically low until we have
- * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
+ * Note: this will be biased artifically low until we have
+ * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
* from having to maintain a count.
*/
- avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
-
- if (avg_local_sample_len <= allowed_ns)
+ avg_len = running_len/NR_ACCUMULATED_SAMPLES;
+ if (avg_len <= max_len)
return;
- if (max_samples_per_tick <= 1)
- return;
+ __report_avg = avg_len;
+ __report_allowed = max_len;
- max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
- sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
- perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
+ /*
+ * Compute a throttle threshold 25% below the current duration.
+ */
+ avg_len += avg_len / 4;
+ max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
+ if (avg_len < max)
+ max /= (u32)avg_len;
+ else
+ max = 1;
- update_perf_cpu_limits();
+ WRITE_ONCE(perf_sample_allowed_ns, avg_len);
+ WRITE_ONCE(max_samples_per_tick, max);
+
+ sysctl_perf_event_sample_rate = max * HZ;
+ perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
if (!irq_work_queue(&perf_duration_work)) {
- early_printk("perf interrupt took too long (%lld > %lld), lowering "
+ early_printk("perf: interrupt took too long (%lld > %lld), lowering "
"kernel.perf_event_max_sample_rate to %d\n",
- avg_local_sample_len, allowed_ns >> 1,
+ __report_avg, __report_allowed,
sysctl_perf_event_sample_rate);
}
}
@@ -4210,6 +4225,14 @@ static void __perf_event_period(struct perf_event *event,
active = (event->state == PERF_EVENT_STATE_ACTIVE);
if (active) {
perf_pmu_disable(ctx->pmu);
+ /*
+ * We could be throttled; unthrottle now to avoid the tick
+ * trying to unthrottle while we already re-started the event.
+ */
+ if (event->hw.interrupts == MAX_INTERRUPTS) {
+ event->hw.interrupts = 0;
+ perf_log_throttle(event, 1);
+ }
event->pmu->stop(event, PERF_EF_UPDATE);
}
@@ -9426,10 +9449,29 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
+ /*
+ * This must be done before the CPU comes alive, because the
+ * moment we can run tasks we can encounter (software) events.
+ *
+ * Specifically, someone can have inherited events on kthreadd
+ * or a pre-existing worker thread that gets re-bound.
+ */
perf_event_init_cpu(cpu);
break;
case CPU_DOWN_PREPARE:
+ /*
+ * This must be done before the CPU dies because after that an
+ * active event might want to IPI the CPU and that'll not work
+ * so great for dead CPUs.
+ *
+ * XXX smp_call_function_single() return -ENXIO without a warn
+ * so we could possibly deal with this.
+ *
+ * This is safe against new events arriving because
+ * sys_perf_event_open() serializes against hotplug using
+ * get_online_cpus().
+ */
perf_event_exit_cpu(cpu);
break;
default:
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 1faad2cfdb9e..c61f0cbd308b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -746,8 +746,10 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
rb->user_page = all_buf;
rb->data_pages[0] = all_buf + PAGE_SIZE;
- rb->page_order = ilog2(nr_pages);
- rb->nr_pages = !!nr_pages;
+ if (nr_pages) {
+ rb->nr_pages = 1;
+ rb->page_order = ilog2(nr_pages);
+ }
ring_buffer_init(rb, watermark, flags);
diff --git a/kernel/exit.c b/kernel/exit.c
index 10e088237fed..953d1a1c0387 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -53,6 +53,7 @@
#include <linux/oom.h>
#include <linux/writeback.h>
#include <linux/shm.h>
+#include <linux/kcov.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -655,6 +656,7 @@ void do_exit(long code)
TASKS_RCU(int tasks_rcu_i);
profile_task_exit(tsk);
+ kcov_task_exit(tsk);
WARN_ON(blk_needs_flush_plug(tsk));
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b8d1e7ceeea..d277e83ed3e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -75,6 +75,7 @@
#include <linux/aio.h>
#include <linux/compiler.h>
#include <linux/sysctl.h>
+#include <linux/kcov.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -392,6 +393,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
account_kernel_stack(ti, 1);
+ kcov_task_init(tsk);
+
return tsk;
free_ti:
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index e0f90c2b57aa..d234022805dc 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -185,10 +185,12 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
rcu_read_unlock();
}
-static unsigned long timeout_jiffies(unsigned long timeout)
+static long hung_timeout_jiffies(unsigned long last_checked,
+ unsigned long timeout)
{
/* timeout of 0 will disable the watchdog */
- return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+ return timeout ? last_checked - jiffies + timeout * HZ :
+ MAX_SCHEDULE_TIMEOUT;
}
/*
@@ -224,18 +226,21 @@ EXPORT_SYMBOL_GPL(reset_hung_task_detector);
*/
static int watchdog(void *dummy)
{
+ unsigned long hung_last_checked = jiffies;
+
set_user_nice(current, 0);
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
+ long t = hung_timeout_jiffies(hung_last_checked, timeout);
- while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
- timeout = sysctl_hung_task_timeout_secs;
-
- if (atomic_xchg(&reset_hung_task, 0))
+ if (t <= 0) {
+ if (!atomic_xchg(&reset_hung_task, 0))
+ check_hung_uninterruptible_tasks(timeout);
+ hung_last_checked = jiffies;
continue;
-
- check_hung_uninterruptible_tasks(timeout);
+ }
+ schedule_timeout_interruptible(t);
}
return 0;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 64731e84c982..cc1cc641d653 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1322,8 +1322,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (nmsk != omsk)
/* hope the handler works with current trigger mode */
- pr_warning("irq %d uses trigger mode %u; requested %u\n",
- irq, nmsk, omsk);
+ pr_warn("irq %d uses trigger mode %u; requested %u\n",
+ irq, nmsk, omsk);
}
*old_ptr = new;
diff --git a/kernel/kcov.c b/kernel/kcov.c
new file mode 100644
index 000000000000..3efbee0834a8
--- /dev/null
+++ b/kernel/kcov.c
@@ -0,0 +1,273 @@
+#define pr_fmt(fmt) "kcov: " fmt
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/kcov.h>
+
+/*
+ * kcov descriptor (one per opened debugfs file).
+ * State transitions of the descriptor:
+ * - initial state after open()
+ * - then there must be a single ioctl(KCOV_INIT_TRACE) call
+ * - then, mmap() call (several calls are allowed but not useful)
+ * - then, repeated enable/disable for a task (only one task a time allowed)
+ */
+struct kcov {
+ /*
+ * Reference counter. We keep one for:
+ * - opened file descriptor
+ * - task with enabled coverage (we can't unwire it from another task)
+ */
+ atomic_t refcount;
+ /* The lock protects mode, size, area and t. */
+ spinlock_t lock;
+ enum kcov_mode mode;
+ /* Size of arena (in long's for KCOV_MODE_TRACE). */
+ unsigned size;
+ /* Coverage buffer shared with user space. */
+ void *area;
+ /* Task for which we collect coverage, or NULL. */
+ struct task_struct *t;
+};
+
+/*
+ * Entry point from instrumented code.
+ * This is called once per basic-block/edge.
+ */
+void __sanitizer_cov_trace_pc(void)
+{
+ struct task_struct *t;
+ enum kcov_mode mode;
+
+ t = current;
+ /*
+ * We are interested in code coverage as a function of a syscall inputs,
+ * so we ignore code executed in interrupts.
+ */
+ if (!t || in_interrupt())
+ return;
+ mode = READ_ONCE(t->kcov_mode);
+ if (mode == KCOV_MODE_TRACE) {
+ unsigned long *area;
+ unsigned long pos;
+
+ /*
+ * There is some code that runs in interrupts but for which
+ * in_interrupt() returns false (e.g. preempt_schedule_irq()).
+ * READ_ONCE()/barrier() effectively provides load-acquire wrt
+ * interrupts, there are paired barrier()/WRITE_ONCE() in
+ * kcov_ioctl_locked().
+ */
+ barrier();
+ area = t->kcov_area;
+ /* The first word is number of subsequent PCs. */
+ pos = READ_ONCE(area[0]) + 1;
+ if (likely(pos < t->kcov_size)) {
+ area[pos] = _RET_IP_;
+ WRITE_ONCE(area[0], pos);
+ }
+ }
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
+
+static void kcov_get(struct kcov *kcov)
+{
+ atomic_inc(&kcov->refcount);
+}
+
+static void kcov_put(struct kcov *kcov)
+{
+ if (atomic_dec_and_test(&kcov->refcount)) {
+ vfree(kcov->area);
+ kfree(kcov);
+ }
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+ t->kcov_mode = KCOV_MODE_DISABLED;
+ t->kcov_size = 0;
+ t->kcov_area = NULL;
+ t->kcov = NULL;
+}
+
+void kcov_task_exit(struct task_struct *t)
+{
+ struct kcov *kcov;
+
+ kcov = t->kcov;
+ if (kcov == NULL)
+ return;
+ spin_lock(&kcov->lock);
+ if (WARN_ON(kcov->t != t)) {
+ spin_unlock(&kcov->lock);
+ return;
+ }
+ /* Just to not leave dangling references behind. */
+ kcov_task_init(t);
+ kcov->t = NULL;
+ spin_unlock(&kcov->lock);
+ kcov_put(kcov);
+}
+
+static int kcov_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+ int res = 0;
+ void *area;
+ struct kcov *kcov = vma->vm_file->private_data;
+ unsigned long size, off;
+ struct page *page;
+
+ area = vmalloc_user(vma->vm_end - vma->vm_start);
+ if (!area)
+ return -ENOMEM;
+
+ spin_lock(&kcov->lock);
+ size = kcov->size * sizeof(unsigned long);
+ if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 ||
+ vma->vm_end - vma->vm_start != size) {
+ res = -EINVAL;
+ goto exit;
+ }
+ if (!kcov->area) {
+ kcov->area = area;
+ vma->vm_flags |= VM_DONTEXPAND;
+ spin_unlock(&kcov->lock);
+ for (off = 0; off < size; off += PAGE_SIZE) {
+ page = vmalloc_to_page(kcov->area + off);
+ if (vm_insert_page(vma, vma->vm_start + off, page))
+ WARN_ONCE(1, "vm_insert_page() failed");
+ }
+ return 0;
+ }
+exit:
+ spin_unlock(&kcov->lock);
+ vfree(area);
+ return res;
+}
+
+static int kcov_open(struct inode *inode, struct file *filep)
+{
+ struct kcov *kcov;
+
+ kcov = kzalloc(sizeof(*kcov), GFP_KERNEL);
+ if (!kcov)
+ return -ENOMEM;
+ atomic_set(&kcov->refcount, 1);
+ spin_lock_init(&kcov->lock);
+ filep->private_data = kcov;
+ return nonseekable_open(inode, filep);
+}
+
+static int kcov_close(struct inode *inode, struct file *filep)
+{
+ kcov_put(filep->private_data);
+ return 0;
+}
+
+static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
+ unsigned long arg)
+{
+ struct task_struct *t;
+ unsigned long size, unused;
+
+ switch (cmd) {
+ case KCOV_INIT_TRACE:
+ /*
+ * Enable kcov in trace mode and setup buffer size.
+ * Must happen before anything else.
+ */
+ if (kcov->mode != KCOV_MODE_DISABLED)
+ return -EBUSY;
+ /*
+ * Size must be at least 2 to hold current position and one PC.
+ * Later we allocate size * sizeof(unsigned long) memory,
+ * that must not overflow.
+ */
+ size = arg;
+ if (size < 2 || size > INT_MAX / sizeof(unsigned long))
+ return -EINVAL;
+ kcov->size = size;
+ kcov->mode = KCOV_MODE_TRACE;
+ return 0;
+ case KCOV_ENABLE:
+ /*
+ * Enable coverage for the current task.
+ * At this point user must have been enabled trace mode,
+ * and mmapped the file. Coverage collection is disabled only
+ * at task exit or voluntary by KCOV_DISABLE. After that it can
+ * be enabled for another task.
+ */
+ unused = arg;
+ if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED ||
+ kcov->area == NULL)
+ return -EINVAL;
+ if (kcov->t != NULL)
+ return -EBUSY;
+ t = current;
+ /* Cache in task struct for performance. */
+ t->kcov_size = kcov->size;
+ t->kcov_area = kcov->area;
+ /* See comment in __sanitizer_cov_trace_pc(). */
+ barrier();
+ WRITE_ONCE(t->kcov_mode, kcov->mode);
+ t->kcov = kcov;
+ kcov->t = t;
+ /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+ kcov_get(kcov);
+ return 0;
+ case KCOV_DISABLE:
+ /* Disable coverage for the current task. */
+ unused = arg;
+ if (unused != 0 || current->kcov != kcov)
+ return -EINVAL;
+ t = current;
+ if (WARN_ON(kcov->t != t))
+ return -EINVAL;
+ kcov_task_init(t);
+ kcov->t = NULL;
+ kcov_put(kcov);
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
+
+static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ struct kcov *kcov;
+ int res;
+
+ kcov = filep->private_data;
+ spin_lock(&kcov->lock);
+ res = kcov_ioctl_locked(kcov, cmd, arg);
+ spin_unlock(&kcov->lock);
+ return res;
+}
+
+static const struct file_operations kcov_fops = {
+ .open = kcov_open,
+ .unlocked_ioctl = kcov_ioctl,
+ .mmap = kcov_mmap,
+ .release = kcov_close,
+};
+
+static int __init kcov_init(void)
+{
+ if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {
+ pr_err("failed to create kcov in debugfs\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+device_initcall(kcov_init);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8e96f6cc2a4a..31322a4275cd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,3 +1,6 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 584febd13e2e..a6d382312e6f 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -41,11 +41,13 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
* memremap() - remap an iomem_resource as cacheable memory
* @offset: iomem resource start address
* @size: size of remap
- * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ * @flags: any of MEMREMAP_WB, MEMREMAP_WT and MEMREMAP_WC
*
* memremap() is "ioremap" for cases where it is known that the resource
* being mapped does not have i/o side effects and the __iomem
- * annotation is not applicable.
+ * annotation is not applicable. In the case of multiple flags, the different
+ * mapping types will be attempted in the order listed below until one of
+ * them succeeds.
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
@@ -57,6 +59,10 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
* cache or are written through to memory and never exist in a
* cache-dirty state with respect to program visibility. Attempts to
* map System RAM with this mapping type will fail.
+ *
+ * MEMREMAP_WC - establish a writecombine mapping, whereby writes may
+ * be coalesced together (e.g. in the CPU's write buffers), but is otherwise
+ * uncached. Attempts to map System RAM with this mapping type will fail.
*/
void *memremap(resource_size_t offset, size_t size, unsigned long flags)
{
@@ -64,6 +70,9 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
void *addr = NULL;
+ if (!flags)
+ return NULL;
+
if (is_ram == REGION_MIXED) {
WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
&offset, (unsigned long) size);
@@ -72,7 +81,6 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
/* Try all mapping types requested until one returns non-NULL */
if (flags & MEMREMAP_WB) {
- flags &= ~MEMREMAP_WB;
/*
* MEMREMAP_WB is special in that it can be satisifed
* from the direct map. Some archs depend on the
@@ -86,21 +94,22 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
}
/*
- * If we don't have a mapping yet and more request flags are
- * pending then we will be attempting to establish a new virtual
+ * If we don't have a mapping yet and other request flags are
+ * present then we will be attempting to establish a new virtual
* address mapping. Enforce that this mapping is not aliasing
* System RAM.
*/
- if (!addr && is_ram == REGION_INTERSECTS && flags) {
+ if (!addr && is_ram == REGION_INTERSECTS && flags != MEMREMAP_WB) {
WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
&offset, (unsigned long) size);
return NULL;
}
- if (!addr && (flags & MEMREMAP_WT)) {
- flags &= ~MEMREMAP_WT;
+ if (!addr && (flags & MEMREMAP_WT))
addr = ioremap_wt(offset, size);
- }
+
+ if (!addr && (flags & MEMREMAP_WC))
+ addr = ioremap_wc(offset, size);
return addr;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index fa400852bf6c..535c96510a44 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -73,6 +73,26 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs)
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+/*
+ * A variant of panic() called from NMI context. We return if we've already
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
+ */
+void nmi_panic(struct pt_regs *regs, const char *msg)
+{
+ int old_cpu, cpu;
+
+ cpu = raw_smp_processor_id();
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
+
+ if (old_cpu == PANIC_CPU_INVALID)
+ panic("%s", msg);
+ else if (old_cpu != cpu)
+ nmi_panic_self_stop(regs);
+}
+EXPORT_SYMBOL(nmi_panic);
+
/**
* panic - halt the system
* @fmt: The text string to print
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 230a77225e2e..5b70d64b871e 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -473,8 +473,7 @@ static int enter_state(suspend_state_t state)
if (state == PM_SUSPEND_FREEZE) {
#ifdef CONFIG_PM_DEBUG
if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
- pr_warning("PM: Unsupported test mode for suspend to idle,"
- "please choose none/freezer/devices/platform.\n");
+ pr_warn("PM: Unsupported test mode for suspend to idle, please choose none/freezer/devices/platform.\n");
return -EAGAIN;
}
#endif
diff --git a/kernel/profile.c b/kernel/profile.c
index 51369697466e..c2199e9901c9 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -44,7 +44,7 @@ int prof_on __read_mostly;
EXPORT_SYMBOL_GPL(prof_on);
static cpumask_var_t prof_cpu_mask;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
static DEFINE_PER_CPU(int, cpu_profile_flip);
static DEFINE_MUTEX(profile_flip_mutex);
@@ -202,7 +202,7 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
}
EXPORT_SYMBOL_GPL(profile_event_unregister);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
/*
* Each cpu has a pair of open-addressed hashtables for pending
* profile hits. read_profile() IPI's all cpus to request them
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 2341efe7fe02..d49bfa1e53e6 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -73,12 +73,11 @@ void __ptrace_unlink(struct task_struct *child)
{
BUG_ON(!child->ptrace);
- child->ptrace = 0;
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
spin_lock(&child->sighand->siglock);
-
+ child->ptrace = 0;
/*
* Clear all pending traps and TRAPPING. TRAPPING should be
* cleared regardless of JOBCTL_STOP_PENDING. Do it explicitly.
@@ -681,7 +680,7 @@ static int ptrace_peek_siginfo(struct task_struct *child,
break;
#ifdef CONFIG_COMPAT
- if (unlikely(is_compat_task())) {
+ if (unlikely(in_compat_syscall())) {
compat_siginfo_t __user *uinfo = compat_ptr(data);
if (copy_siginfo_to_user32(uinfo, &info) ||
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 61a16569ffbf..032b2c015beb 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,3 +1,7 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
+
obj-y += update.o sync.o
obj-$(CONFIG_SRCU) += srcu.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 302d6ebd64f7..414d9c16da42 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -2,6 +2,10 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
endif
+# These files are disabled because they produce non-interesting flaky coverage
+# that is not a function of syscall inputs. E.g. involuntary context switches.
+KCOV_INSTRUMENT := n
+
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44db0fffa8be..d8465eeab8b3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5371,6 +5371,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_UP_PREPARE:
rq->calc_load_update = calc_load_update;
+ account_reset_rq(rq);
break;
case CPU_ONLINE:
@@ -7537,7 +7538,7 @@ void set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
-static void free_sched_group(struct task_group *tg)
+static void sched_free_group(struct task_group *tg)
{
free_fair_sched_group(tg);
free_rt_sched_group(tg);
@@ -7563,7 +7564,7 @@ struct task_group *sched_create_group(struct task_group *parent)
return tg;
err:
- free_sched_group(tg);
+ sched_free_group(tg);
return ERR_PTR(-ENOMEM);
}
@@ -7583,17 +7584,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
}
/* rcu callback to free various structures associated with a task group */
-static void free_sched_group_rcu(struct rcu_head *rhp)
+static void sched_free_group_rcu(struct rcu_head *rhp)
{
/* now it should be safe to free those cfs_rqs */
- free_sched_group(container_of(rhp, struct task_group, rcu));
+ sched_free_group(container_of(rhp, struct task_group, rcu));
}
-/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg)
{
/* wait for possible concurrent references to cfs_rqs complete */
- call_rcu(&tg->rcu, free_sched_group_rcu);
+ call_rcu(&tg->rcu, sched_free_group_rcu);
}
void sched_offline_group(struct task_group *tg)
@@ -8052,31 +8052,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (IS_ERR(tg))
return ERR_PTR(-ENOMEM);
+ sched_online_group(tg, parent);
+
return &tg->css;
}
-static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
- struct task_group *parent = css_tg(css->parent);
- if (parent)
- sched_online_group(tg, parent);
- return 0;
+ sched_offline_group(tg);
}
static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
- sched_destroy_group(tg);
-}
-
-static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
-{
- struct task_group *tg = css_tg(css);
-
- sched_offline_group(tg);
+ /*
+ * Relies on the RCU grace period between css_released() and this.
+ */
+ sched_free_group(tg);
}
static void cpu_cgroup_fork(struct task_struct *task)
@@ -8436,9 +8431,8 @@ static struct cftype cpu_files[] = {
struct cgroup_subsys cpu_cgrp_subsys = {
.css_alloc = cpu_cgroup_css_alloc,
+ .css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
- .css_online = cpu_cgroup_css_online,
- .css_offline = cpu_cgroup_css_offline,
.fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 2ddaebf7469a..4a811203c04a 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -145,13 +145,16 @@ static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
}
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 reset)
+ u64 val)
{
struct cpuacct *ca = css_ca(css);
int err = 0;
int i;
- if (reset) {
+ /*
+ * Only allow '0' here to do a reset.
+ */
+ if (val) {
err = -EINVAL;
goto out;
}
@@ -235,23 +238,10 @@ static struct cftype files[] = {
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
- int cpu;
-
- cpu = task_cpu(tsk);
rcu_read_lock();
-
- ca = task_ca(tsk);
-
- while (true) {
- u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
- *cpuusage += cputime;
-
- ca = parent_ca(ca);
- if (!ca)
- break;
- }
-
+ for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
+ *this_cpu_ptr(ca->cpuusage) += cputime;
rcu_read_unlock();
}
@@ -260,18 +250,13 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
*
* Note: it's the caller that updates the account of the root cgroup.
*/
-void cpuacct_account_field(struct task_struct *p, int index, u64 val)
+void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
- struct kernel_cpustat *kcpustat;
struct cpuacct *ca;
rcu_read_lock();
- ca = task_ca(p);
- while (ca != &root_cpuacct) {
- kcpustat = this_cpu_ptr(ca->cpustat);
- kcpustat->cpustat[index] += val;
- ca = parent_ca(ca);
- }
+ for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
+ this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
rcu_read_unlock();
}
diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
index ed605624a5e7..ba72807c73d4 100644
--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h
@@ -1,7 +1,7 @@
#ifdef CONFIG_CGROUP_CPUACCT
extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
-extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
+extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
#else
@@ -10,7 +10,7 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime)
}
static inline void
-cpuacct_account_field(struct task_struct *p, int index, u64 val)
+cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 46d64e4ccfde..0fe30e66aff1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3181,17 +3181,25 @@ static inline void check_schedstat_required(void)
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
+ bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
+ bool curr = cfs_rq->curr == se;
+
/*
- * Update the normalized vruntime before updating min_vruntime
- * through calling update_curr().
+ * If we're the current task, we must renormalise before calling
+ * update_curr().
*/
- if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
+ if (renorm && curr)
se->vruntime += cfs_rq->min_vruntime;
+ update_curr(cfs_rq);
+
/*
- * Update run-time statistics of the 'current'.
+ * Otherwise, renormalise after, such that we're placed at the current
+ * moment in time, instead of some random moment in the past.
*/
- update_curr(cfs_rq);
+ if (renorm && !curr)
+ se->vruntime += cfs_rq->min_vruntime;
+
enqueue_entity_load_avg(cfs_rq, se);
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
@@ -3207,7 +3215,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue(cfs_rq, se);
check_spread(cfs_rq, se);
}
- if (se != cfs_rq->curr)
+ if (!curr)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
@@ -5071,7 +5079,19 @@ static int select_idle_sibling(struct task_struct *p, int target)
return i;
/*
- * Otherwise, iterate the domains and find an elegible idle cpu.
+ * Otherwise, iterate the domains and find an eligible idle cpu.
+ *
+ * A completely idle sched group at higher domains is more
+ * desirable than an idle group at a lower level, because lower
+ * domains have smaller groups and usually share hardware
+ * resources which causes tasks to contend on them, e.g. x86
+ * hyperthread siblings in the lowest domain (SMT) can contend
+ * on the shared cpu pipeline.
+ *
+ * However, while we prefer idle groups at higher domains
+ * finding an idle cpu at the lowest domain is still better than
+ * returning 'target', which we've already established, isn't
+ * idle.
*/
sd = rcu_dereference(per_cpu(sd_llc, target));
for_each_lower_domain(sd) {
@@ -5081,11 +5101,16 @@ static int select_idle_sibling(struct task_struct *p, int target)
tsk_cpus_allowed(p)))
goto next;
+ /* Ensure the entire group is idle */
for_each_cpu(i, sched_group_cpus(sg)) {
if (i == target || !idle_cpu(i))
goto next;
}
+ /*
+ * It doesn't matter which cpu we pick, the
+ * whole group is idle.
+ */
target = cpumask_first_and(sched_group_cpus(sg),
tsk_cpus_allowed(p));
goto done;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 382848a24ed9..ec2e8d23527e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1841,3 +1841,16 @@ static inline void cpufreq_trigger_update(u64 time)
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}
#endif /* CONFIG_CPU_FREQ */
+
+static inline void account_reset_rq(struct rq *rq)
+{
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ rq->prev_irq_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT
+ rq->prev_steal_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ rq->prev_steal_time_rq = 0;
+#endif
+}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 15a1795bbba1..e1e5a354854e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -395,7 +395,7 @@ seccomp_prepare_user_filter(const char __user *user_filter)
struct seccomp_filter *filter = ERR_PTR(-EFAULT);
#ifdef CONFIG_COMPAT
- if (is_compat_task()) {
+ if (in_compat_syscall()) {
struct compat_sock_fprog fprog32;
if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
goto out;
@@ -529,7 +529,7 @@ static void __secure_computing_strict(int this_syscall)
{
int *syscall_whitelist = mode1_syscalls;
#ifdef CONFIG_COMPAT
- if (is_compat_task())
+ if (in_compat_syscall())
syscall_whitelist = mode1_syscalls_32;
#endif
do {
diff --git a/kernel/signal.c b/kernel/signal.c
index fe8ed298373c..aa9bf00749c1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3585,6 +3585,10 @@ __weak const char *arch_vma_name(struct vm_area_struct *vma)
void __init signals_init(void)
{
+ /* If this check fails, the __ARCH_SI_PREAMBLE_SIZE value is wrong! */
+ BUILD_BUG_ON(__ARCH_SI_PREAMBLE_SIZE
+ != offsetof(struct siginfo, _sifields._pad));
+
sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
}
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 7e7746a42a62..10a1d7dc9313 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,7 +1321,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
}
mnt = task_active_pid_ns(current)->proc_mnt;
- file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
+ file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0);
result = PTR_ERR(file);
if (IS_ERR(file))
goto out_putname;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 195fe7d2caad..084b79f5917e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -378,7 +378,7 @@ static int __init tick_nohz_full_setup(char *str)
{
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
- pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
+ pr_warn("NO_HZ: Incorrect nohz_full cpumask\n");
free_bootmem_cpumask_var(tick_nohz_full_mask);
return 1;
}
@@ -446,8 +446,7 @@ void __init tick_nohz_init(void)
* interrupts to avoid circular dependency on the tick
*/
if (!arch_irq_work_has_interrupt()) {
- pr_warning("NO_HZ: Can't run full dynticks because arch doesn't "
- "support irq work self-IPIs\n");
+ pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
cpumask_clear(tick_nohz_full_mask);
cpumask_copy(housekeeping_mask, cpu_possible_mask);
tick_nohz_full_running = false;
@@ -457,7 +456,8 @@ void __init tick_nohz_init(void)
cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
- pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
+ pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
+ cpu);
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2aeb6ffc0a1e..f94e7a21f52d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1437,12 +1437,12 @@ static struct trace_event trace_blk_event = {
static int __init init_blk_tracer(void)
{
if (!register_trace_event(&trace_blk_event)) {
- pr_warning("Warning: could not register block events\n");
+ pr_warn("Warning: could not register block events\n");
return 1;
}
if (register_tracer(&blk_tracer) != 0) {
- pr_warning("Warning: could not register the block tracer\n");
+ pr_warn("Warning: could not register the block tracer\n");
unregister_trace_event(&trace_blk_event);
return 1;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 57a6eea84694..b1870fbd2b67 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1030,8 +1030,7 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
for_each_possible_cpu(cpu) {
stat = &per_cpu(ftrace_profile_stats, cpu);
- /* allocate enough for function name + cpu number */
- name = kmalloc(32, GFP_KERNEL);
+ name = kasprintf(GFP_KERNEL, "function%d", cpu);
if (!name) {
/*
* The files created are permanent, if something happens
@@ -1043,7 +1042,6 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
return;
}
stat->stat = function_stats;
- snprintf(name, 32, "function%d", cpu);
stat->stat.name = name;
ret = register_stat_tracer(&stat->stat);
if (ret) {
@@ -1058,8 +1056,7 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
entry = tracefs_create_file("function_profile_enabled", 0644,
d_tracer, NULL, &ftrace_profile_fops);
if (!entry)
- pr_warning("Could not create tracefs "
- "'function_profile_enabled' entry\n");
+ pr_warn("Could not create tracefs 'function_profile_enabled' entry\n");
}
#else /* CONFIG_FUNCTION_PROFILER */
@@ -1610,7 +1607,7 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
return keep_regs;
}
-static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
+static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
{
@@ -1618,12 +1615,13 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
struct ftrace_hash *other_hash;
struct ftrace_page *pg;
struct dyn_ftrace *rec;
+ bool update = false;
int count = 0;
int all = 0;
/* Only update if the ops has been registered */
if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
- return;
+ return false;
/*
* In the filter_hash case:
@@ -1650,7 +1648,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
* then there's nothing to do.
*/
if (ftrace_hash_empty(hash))
- return;
+ return false;
}
do_for_each_ftrace_rec(pg, rec) {
@@ -1694,7 +1692,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (inc) {
rec->flags++;
if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX))
- return;
+ return false;
/*
* If there's only a single callback registered to a
@@ -1720,7 +1718,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
rec->flags |= FTRACE_FL_REGS;
} else {
if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0))
- return;
+ return false;
rec->flags--;
/*
@@ -1753,22 +1751,28 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
*/
}
count++;
+
+ /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
+ update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+
/* Shortcut, if we handled all records, we are done. */
if (!all && count == hash->count)
- return;
+ return update;
} while_for_each_ftrace_rec();
+
+ return update;
}
-static void ftrace_hash_rec_disable(struct ftrace_ops *ops,
+static bool ftrace_hash_rec_disable(struct ftrace_ops *ops,
int filter_hash)
{
- __ftrace_hash_rec_update(ops, filter_hash, 0);
+ return __ftrace_hash_rec_update(ops, filter_hash, 0);
}
-static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
+static bool ftrace_hash_rec_enable(struct ftrace_ops *ops,
int filter_hash)
{
- __ftrace_hash_rec_update(ops, filter_hash, 1);
+ return __ftrace_hash_rec_update(ops, filter_hash, 1);
}
static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops,
@@ -2314,8 +2318,8 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
if (rec->flags & FTRACE_FL_TRAMP_EN) {
ops = ftrace_find_tramp_ops_curr(rec);
if (FTRACE_WARN_ON(!ops)) {
- pr_warning("Bad trampoline accounting at: %p (%pS)\n",
- (void *)rec->ip, (void *)rec->ip);
+ pr_warn("Bad trampoline accounting at: %p (%pS)\n",
+ (void *)rec->ip, (void *)rec->ip);
/* Ftrace is shutting down, return anything */
return (unsigned long)FTRACE_ADDR;
}
@@ -2644,7 +2648,6 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return ret;
ftrace_start_up++;
- command |= FTRACE_UPDATE_CALLS;
/*
* Note that ftrace probes uses this to start up
@@ -2665,7 +2668,8 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return ret;
}
- ftrace_hash_rec_enable(ops, 1);
+ if (ftrace_hash_rec_enable(ops, 1))
+ command |= FTRACE_UPDATE_CALLS;
ftrace_startup_enable(command);
@@ -2695,11 +2699,11 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
/* Disabling ipmodify never fails */
ftrace_hash_ipmodify_disable(ops);
- ftrace_hash_rec_disable(ops, 1);
- ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+ if (ftrace_hash_rec_disable(ops, 1))
+ command |= FTRACE_UPDATE_CALLS;
- command |= FTRACE_UPDATE_CALLS;
+ ops->flags &= ~FTRACE_OPS_FL_ENABLED;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d9293402ee68..a2f0b9f33e9b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -74,11 +74,6 @@ static struct tracer_opt dummy_tracer_opt[] = {
{ }
};
-static struct tracer_flags dummy_tracer_flags = {
- .val = 0,
- .opts = dummy_tracer_opt
-};
-
static int
dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
@@ -1258,12 +1253,22 @@ int __init register_tracer(struct tracer *type)
if (!type->set_flag)
type->set_flag = &dummy_set_flag;
- if (!type->flags)
- type->flags = &dummy_tracer_flags;
- else
+ if (!type->flags) {
+ /*allocate a dummy tracer_flags*/
+ type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
+ if (!type->flags) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ type->flags->val = 0;
+ type->flags->opts = dummy_tracer_opt;
+ } else
if (!type->flags->opts)
type->flags->opts = dummy_tracer_opt;
+ /* store the tracer for __set_tracer_option */
+ type->flags->trace = type;
+
ret = run_tracer_selftest(type);
if (ret < 0)
goto out;
@@ -1659,6 +1664,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
#else
TRACE_FLAG_IRQS_NOSUPPORT |
#endif
+ ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
@@ -2071,20 +2077,20 @@ void trace_printk_init_buffers(void)
/* trace_printk() is for debug use only. Don't use it in production. */
- pr_warning("\n");
- pr_warning("**********************************************************\n");
- pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
- pr_warning("** **\n");
- pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
- pr_warning("** **\n");
- pr_warning("** This means that this is a DEBUG kernel and it is **\n");
- pr_warning("** unsafe for production use. **\n");
- pr_warning("** **\n");
- pr_warning("** If you see this message and you are not debugging **\n");
- pr_warning("** the kernel, report this immediately to your vendor! **\n");
- pr_warning("** **\n");
- pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
- pr_warning("**********************************************************\n");
+ pr_warn("\n");
+ pr_warn("**********************************************************\n");
+ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_warn("** **\n");
+ pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
+ pr_warn("** **\n");
+ pr_warn("** This means that this is a DEBUG kernel and it is **\n");
+ pr_warn("** unsafe for production use. **\n");
+ pr_warn("** **\n");
+ pr_warn("** If you see this message and you are not debugging **\n");
+ pr_warn("** the kernel, report this immediately to your vendor! **\n");
+ pr_warn("** **\n");
+ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_warn("**********************************************************\n");
/* Expand the buffers to set size */
tracing_update_buffers();
@@ -3505,7 +3511,7 @@ static int __set_tracer_option(struct trace_array *tr,
struct tracer_flags *tracer_flags,
struct tracer_opt *opts, int neg)
{
- struct tracer *trace = tr->current_trace;
+ struct tracer *trace = tracer_flags->trace;
int ret;
ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
@@ -4101,7 +4107,7 @@ trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
*/
map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
if (!map_array) {
- pr_warning("Unable to allocate trace enum mapping\n");
+ pr_warn("Unable to allocate trace enum mapping\n");
return;
}
@@ -4949,7 +4955,10 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
spd.nr_pages = i;
- ret = splice_to_pipe(pipe, &spd);
+ if (i)
+ ret = splice_to_pipe(pipe, &spd);
+ else
+ ret = 0;
out:
splice_shrink_spd(&spd);
return ret;
@@ -6131,7 +6140,7 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {
- pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
+ pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
return;
}
@@ -6318,7 +6327,7 @@ struct dentry *trace_create_file(const char *name,
ret = tracefs_create_file(name, mode, parent, data, fops);
if (!ret)
- pr_warning("Could not create tracefs '%s' entry\n", name);
+ pr_warn("Could not create tracefs '%s' entry\n", name);
return ret;
}
@@ -6337,7 +6346,7 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr)
tr->options = tracefs_create_dir("options", d_tracer);
if (!tr->options) {
- pr_warning("Could not create tracefs directory 'options'\n");
+ pr_warn("Could not create tracefs directory 'options'\n");
return NULL;
}
@@ -6391,11 +6400,8 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
return;
for (i = 0; i < tr->nr_topts; i++) {
- /*
- * Check if these flags have already been added.
- * Some tracers share flags.
- */
- if (tr->topts[i].tracer->flags == tracer->flags)
+ /* Make sure there's no duplicate flags. */
+ if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
return;
}
@@ -7248,8 +7254,8 @@ __init static int tracer_alloc_buffers(void)
if (trace_boot_clock) {
ret = tracing_set_clock(&global_trace, trace_boot_clock);
if (ret < 0)
- pr_warning("Trace clock %s not defined, going back to default\n",
- trace_boot_clock);
+ pr_warn("Trace clock %s not defined, going back to default\n",
+ trace_boot_clock);
}
/*
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8414fa40bf27..3fff4adfd431 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -125,6 +125,7 @@ enum trace_flag_type {
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
+ TRACE_FLAG_NMI = 0x40,
};
#define TRACE_BUF_SIZE 1024
@@ -345,6 +346,7 @@ struct tracer_opt {
struct tracer_flags {
u32 val;
struct tracer_opt *opts;
+ struct tracer *trace;
};
/* Makes more easy to define a tracer opt */
@@ -1111,6 +1113,18 @@ struct filter_pred {
unsigned short right;
};
+static inline bool is_string_field(struct ftrace_event_field *field)
+{
+ return field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_STATIC_STRING ||
+ field->filter_type == FILTER_PTR_STRING;
+}
+
+static inline bool is_function_field(struct ftrace_event_field *field)
+{
+ return field->filter_type == FILTER_TRACE_FN;
+}
+
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct trace_event_file *file,
@@ -1159,9 +1173,24 @@ struct event_trigger_data {
struct event_filter __rcu *filter;
char *filter_str;
void *private_data;
+ bool paused;
struct list_head list;
};
+extern void trigger_data_free(struct event_trigger_data *data);
+extern int event_trigger_init(struct event_trigger_ops *ops,
+ struct event_trigger_data *data);
+extern int trace_event_trigger_enable_disable(struct trace_event_file *file,
+ int trigger_enable);
+extern void update_cond_flag(struct trace_event_file *file);
+extern void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *test,
+ struct trace_event_file *file);
+extern int set_trigger_filter(char *filter_str,
+ struct event_trigger_data *trigger_data,
+ struct trace_event_file *file);
+extern int register_event_command(struct event_command *cmd);
+
/**
* struct event_trigger_ops - callbacks for trace event triggers
*
@@ -1174,7 +1203,8 @@ struct event_trigger_data {
* @func: The trigger 'probe' function called when the triggering
* event occurs. The data passed into this callback is the data
* that was supplied to the event_command @reg() function that
- * registered the trigger (see struct event_command).
+ * registered the trigger (see struct event_command) along with
+ * the trace record, rec.
*
* @init: An optional initialization function called for the trigger
* when the trigger is registered (via the event_command reg()
@@ -1199,7 +1229,8 @@ struct event_trigger_data {
* (see trace_event_triggers.c).
*/
struct event_trigger_ops {
- void (*func)(struct event_trigger_data *data);
+ void (*func)(struct event_trigger_data *data,
+ void *rec);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
@@ -1243,27 +1274,10 @@ struct event_trigger_ops {
* values are defined by adding new values to the trigger_type
* enum in include/linux/trace_events.h.
*
- * @post_trigger: A flag that says whether or not this command needs
- * to have its action delayed until after the current event has
- * been closed. Some triggers need to avoid being invoked while
- * an event is currently in the process of being logged, since
- * the trigger may itself log data into the trace buffer. Thus
- * we make sure the current event is committed before invoking
- * those triggers. To do that, the trigger invocation is split
- * in two - the first part checks the filter using the current
- * trace record; if a command has the @post_trigger flag set, it
- * sets a bit for itself in the return value, otherwise it
- * directly invokes the trigger. Once all commands have been
- * either invoked or set their return flag, the current record is
- * either committed or discarded. At that point, if any commands
- * have deferred their triggers, those commands are finally
- * invoked following the close of the current event. In other
- * words, if the event_trigger_ops @func() probe implementation
- * itself logs to the trace buffer, this flag should be set,
- * otherwise it can be left unspecified.
+ * @flags: See the enum event_command_flags below.
*
- * All the methods below, except for @set_filter(), must be
- * implemented.
+ * All the methods below, except for @set_filter() and @unreg_all(),
+ * must be implemented.
*
* @func: The callback function responsible for parsing and
* registering the trigger written to the 'trigger' file by the
@@ -1288,6 +1302,10 @@ struct event_trigger_ops {
* This is usually implemented by the generic utility function
* @unregister_trigger() (see trace_event_triggers.c).
*
+ * @unreg_all: An optional function called to remove all the triggers
+ * from the list of triggers associated with the event. Called
+ * when a trigger file is opened in truncate mode.
+ *
* @set_filter: An optional function called to parse and set a filter
* for the trigger. If no @set_filter() method is set for the
* event command, filters set by the user for the command will be
@@ -1301,7 +1319,7 @@ struct event_command {
struct list_head list;
char *name;
enum event_trigger_type trigger_type;
- bool post_trigger;
+ int flags;
int (*func)(struct event_command *cmd_ops,
struct trace_event_file *file,
char *glob, char *cmd, char *params);
@@ -1313,12 +1331,56 @@ struct event_command {
struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
+ void (*unreg_all)(struct trace_event_file *file);
int (*set_filter)(char *filter_str,
struct event_trigger_data *data,
struct trace_event_file *file);
struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
};
+/**
+ * enum event_command_flags - flags for struct event_command
+ *
+ * @POST_TRIGGER: A flag that says whether or not this command needs
+ * to have its action delayed until after the current event has
+ * been closed. Some triggers need to avoid being invoked while
+ * an event is currently in the process of being logged, since
+ * the trigger may itself log data into the trace buffer. Thus
+ * we make sure the current event is committed before invoking
+ * those triggers. To do that, the trigger invocation is split
+ * in two - the first part checks the filter using the current
+ * trace record; if a command has the @post_trigger flag set, it
+ * sets a bit for itself in the return value, otherwise it
+ * directly invokes the trigger. Once all commands have been
+ * either invoked or set their return flag, the current record is
+ * either committed or discarded. At that point, if any commands
+ * have deferred their triggers, those commands are finally
+ * invoked following the close of the current event. In other
+ * words, if the event_trigger_ops @func() probe implementation
+ * itself logs to the trace buffer, this flag should be set,
+ * otherwise it can be left unspecified.
+ *
+ * @NEEDS_REC: A flag that says whether or not this command needs
+ * access to the trace record in order to perform its function,
+ * regardless of whether or not it has a filter associated with
+ * it (filters make a trigger require access to the trace record
+ * but are not always present).
+ */
+enum event_command_flags {
+ EVENT_CMD_FL_POST_TRIGGER = 1,
+ EVENT_CMD_FL_NEEDS_REC = 2,
+};
+
+static inline bool event_command_post_trigger(struct event_command *cmd_ops)
+{
+ return cmd_ops->flags & EVENT_CMD_FL_POST_TRIGGER;
+}
+
+static inline bool event_command_needs_rec(struct event_command *cmd_ops)
+{
+ return cmd_ops->flags & EVENT_CMD_FL_NEEDS_REC;
+}
+
extern int trace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable);
extern int tracing_alloc_snapshot(void);
@@ -1365,8 +1427,13 @@ int perf_ftrace_event_register(struct trace_event_call *call,
#ifdef CONFIG_FTRACE_SYSCALLS
void init_ftrace_syscalls(void);
+const char *get_syscall_name(int syscall);
#else
static inline void init_ftrace_syscalls(void) { }
+static inline const char *get_syscall_name(int syscall)
+{
+ return NULL;
+}
#endif
#ifdef CONFIG_EVENT_TRACING
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 6816302542b2..b3f5051cd4e9 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -961,18 +961,6 @@ int filter_assign_type(const char *type)
return FILTER_OTHER;
}
-static bool is_function_field(struct ftrace_event_field *field)
-{
- return field->filter_type == FILTER_TRACE_FN;
-}
-
-static bool is_string_field(struct ftrace_event_field *field)
-{
- return field->filter_type == FILTER_DYN_STRING ||
- field->filter_type == FILTER_STATIC_STRING ||
- field->filter_type == FILTER_PTR_STRING;
-}
-
static bool is_legal_op(struct ftrace_event_field *field, int op)
{
if (is_string_field(field) &&
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index b38f617b6181..d67992f3bb0e 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -28,8 +28,7 @@
static LIST_HEAD(trigger_commands);
static DEFINE_MUTEX(trigger_cmd_mutex);
-static void
-trigger_data_free(struct event_trigger_data *data)
+void trigger_data_free(struct event_trigger_data *data)
{
if (data->cmd_ops->set_filter)
data->cmd_ops->set_filter(NULL, data, NULL);
@@ -73,18 +72,20 @@ event_triggers_call(struct trace_event_file *file, void *rec)
return tt;
list_for_each_entry_rcu(data, &file->triggers, list) {
+ if (data->paused)
+ continue;
if (!rec) {
- data->ops->func(data);
+ data->ops->func(data, rec);
continue;
}
filter = rcu_dereference_sched(data->filter);
if (filter && !filter_match_preds(filter, rec))
continue;
- if (data->cmd_ops->post_trigger) {
+ if (event_command_post_trigger(data->cmd_ops)) {
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data);
+ data->ops->func(data, rec);
}
return tt;
}
@@ -94,6 +95,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
* event_triggers_post_call - Call 'post_triggers' for a trace event
* @file: The trace_event_file associated with the event
* @tt: enum event_trigger_type containing a set bit for each trigger to invoke
+ * @rec: The trace entry for the event
*
* For each trigger associated with an event, invoke the trigger
* function registered with the associated trigger command, if the
@@ -104,13 +106,16 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
*/
void
event_triggers_post_call(struct trace_event_file *file,
- enum event_trigger_type tt)
+ enum event_trigger_type tt,
+ void *rec)
{
struct event_trigger_data *data;
list_for_each_entry_rcu(data, &file->triggers, list) {
+ if (data->paused)
+ continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data);
+ data->ops->func(data, rec);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -188,6 +193,19 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
return -ENODEV;
}
+ if ((file->f_mode & FMODE_WRITE) &&
+ (file->f_flags & O_TRUNC)) {
+ struct trace_event_file *event_file;
+ struct event_command *p;
+
+ event_file = event_file_data(file);
+
+ list_for_each_entry(p, &trigger_commands, list) {
+ if (p->unreg_all)
+ p->unreg_all(event_file);
+ }
+ }
+
if (file->f_mode & FMODE_READ) {
ret = seq_open(file, &event_triggers_seq_ops);
if (!ret) {
@@ -306,7 +324,7 @@ const struct file_operations event_trigger_fops = {
* Currently we only register event commands from __init, so mark this
* __init too.
*/
-static __init int register_event_command(struct event_command *cmd)
+__init int register_event_command(struct event_command *cmd)
{
struct event_command *p;
int ret = 0;
@@ -395,9 +413,8 @@ event_trigger_print(const char *name, struct seq_file *m,
*
* Return: 0 on success, errno otherwise
*/
-static int
-event_trigger_init(struct event_trigger_ops *ops,
- struct event_trigger_data *data)
+int event_trigger_init(struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
{
data->ref++;
return 0;
@@ -425,8 +442,8 @@ event_trigger_free(struct event_trigger_ops *ops,
trigger_data_free(data);
}
-static int trace_event_trigger_enable_disable(struct trace_event_file *file,
- int trigger_enable)
+int trace_event_trigger_enable_disable(struct trace_event_file *file,
+ int trigger_enable)
{
int ret = 0;
@@ -483,13 +500,14 @@ clear_event_triggers(struct trace_array *tr)
* its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be
* cleared.
*/
-static void update_cond_flag(struct trace_event_file *file)
+void update_cond_flag(struct trace_event_file *file)
{
struct event_trigger_data *data;
bool set_cond = false;
list_for_each_entry_rcu(data, &file->triggers, list) {
- if (data->filter || data->cmd_ops->post_trigger) {
+ if (data->filter || event_command_post_trigger(data->cmd_ops) ||
+ event_command_needs_rec(data->cmd_ops)) {
set_cond = true;
break;
}
@@ -560,9 +578,9 @@ out:
* Usually used directly as the @unreg method in event command
* implementations.
*/
-static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *test,
- struct trace_event_file *file)
+void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *test,
+ struct trace_event_file *file)
{
struct event_trigger_data *data;
bool unregistered = false;
@@ -696,9 +714,9 @@ event_trigger_callback(struct event_command *cmd_ops,
*
* Return: 0 on success, errno otherwise
*/
-static int set_trigger_filter(char *filter_str,
- struct event_trigger_data *trigger_data,
- struct trace_event_file *file)
+int set_trigger_filter(char *filter_str,
+ struct event_trigger_data *trigger_data,
+ struct trace_event_file *file)
{
struct event_trigger_data *data = trigger_data;
struct event_filter *filter = NULL, *tmp;
@@ -747,7 +765,7 @@ static int set_trigger_filter(char *filter_str,
}
static void
-traceon_trigger(struct event_trigger_data *data)
+traceon_trigger(struct event_trigger_data *data, void *rec)
{
if (tracing_is_on())
return;
@@ -756,7 +774,7 @@ traceon_trigger(struct event_trigger_data *data)
}
static void
-traceon_count_trigger(struct event_trigger_data *data)
+traceon_count_trigger(struct event_trigger_data *data, void *rec)
{
if (tracing_is_on())
return;
@@ -771,7 +789,7 @@ traceon_count_trigger(struct event_trigger_data *data)
}
static void
-traceoff_trigger(struct event_trigger_data *data)
+traceoff_trigger(struct event_trigger_data *data, void *rec)
{
if (!tracing_is_on())
return;
@@ -780,7 +798,7 @@ traceoff_trigger(struct event_trigger_data *data)
}
static void
-traceoff_count_trigger(struct event_trigger_data *data)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec)
{
if (!tracing_is_on())
return;
@@ -876,13 +894,13 @@ static struct event_command trigger_traceoff_cmd = {
#ifdef CONFIG_TRACER_SNAPSHOT
static void
-snapshot_trigger(struct event_trigger_data *data)
+snapshot_trigger(struct event_trigger_data *data, void *rec)
{
tracing_snapshot();
}
static void
-snapshot_count_trigger(struct event_trigger_data *data)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec)
{
if (!data->count)
return;
@@ -890,7 +908,7 @@ snapshot_count_trigger(struct event_trigger_data *data)
if (data->count != -1)
(data->count)--;
- snapshot_trigger(data);
+ snapshot_trigger(data, rec);
}
static int
@@ -969,13 +987,13 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
#define STACK_SKIP 3
static void
-stacktrace_trigger(struct event_trigger_data *data)
+stacktrace_trigger(struct event_trigger_data *data, void *rec)
{
trace_dump_stack(STACK_SKIP);
}
static void
-stacktrace_count_trigger(struct event_trigger_data *data)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
{
if (!data->count)
return;
@@ -983,7 +1001,7 @@ stacktrace_count_trigger(struct event_trigger_data *data)
if (data->count != -1)
(data->count)--;
- stacktrace_trigger(data);
+ stacktrace_trigger(data, rec);
}
static int
@@ -1017,7 +1035,7 @@ stacktrace_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
- .post_trigger = true,
+ .flags = EVENT_CMD_FL_POST_TRIGGER,
.func = event_trigger_callback,
.reg = register_trigger,
.unreg = unregister_trigger,
@@ -1054,7 +1072,7 @@ struct enable_trigger_data {
};
static void
-event_enable_trigger(struct event_trigger_data *data)
+event_enable_trigger(struct event_trigger_data *data, void *rec)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1065,7 +1083,7 @@ event_enable_trigger(struct event_trigger_data *data)
}
static void
-event_enable_count_trigger(struct event_trigger_data *data)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1079,7 +1097,7 @@ event_enable_count_trigger(struct event_trigger_data *data)
if (data->count != -1)
(data->count)--;
- event_enable_trigger(data);
+ event_enable_trigger(data, rec);
}
static int
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index fcd41a166405..5a095c2e4b69 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -219,6 +219,8 @@ static void tracing_stop_function_trace(struct trace_array *tr)
unregister_ftrace_function(tr->ops);
}
+static struct tracer function_trace;
+
static int
func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
@@ -228,6 +230,10 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
break;
+ /* We can change this flag when not running. */
+ if (tr->current_trace != &function_trace)
+ break;
+
unregister_ftrace_function(tr->ops);
if (set) {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a663cbb84107..91d6a63a2ea7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1350,7 +1350,7 @@ void graph_trace_open(struct trace_iterator *iter)
out_err_free:
kfree(data);
out_err:
- pr_warning("function graph tracer: not enough memory\n");
+ pr_warn("function graph tracer: not enough memory\n");
}
void graph_trace_close(struct trace_iterator *iter)
@@ -1468,12 +1468,12 @@ static __init int init_graph_trace(void)
max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
if (!register_trace_event(&graph_trace_entry_event)) {
- pr_warning("Warning: could not register graph trace events\n");
+ pr_warn("Warning: could not register graph trace events\n");
return 1;
}
if (!register_trace_event(&graph_trace_ret_event)) {
- pr_warning("Warning: could not register graph trace events\n");
+ pr_warn("Warning: could not register graph trace events\n");
return 1;
}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index e4e56589ec1d..03cdff84d026 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -109,8 +109,12 @@ static int func_prolog_dec(struct trace_array *tr,
return 0;
local_save_flags(*flags);
- /* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(*flags))
+ /*
+ * Slight chance to get a false positive on tracing_cpu,
+ * although I'm starting to think there isn't a chance.
+ * Leave this for now just to be paranoid.
+ */
+ if (!irqs_disabled_flags(*flags) && !preempt_count())
return 0;
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -622,7 +626,6 @@ static int __irqsoff_tracer_init(struct trace_array *tr)
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
- tracing_reset_online_cpus(&tr->trace_buffer);
ftrace_init_array_ops(tr, irqsoff_tracer_call);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 21b81a41dae5..919e0ddd8fcc 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -459,16 +459,14 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
if (ret == 0)
tk->tp.flags |= TP_FLAG_REGISTERED;
else {
- pr_warning("Could not insert probe at %s+%lu: %d\n",
- trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
+ pr_warn("Could not insert probe at %s+%lu: %d\n",
+ trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
- pr_warning("This probe might be able to register after"
- "target module is loaded. Continue.\n");
+ pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
ret = 0;
} else if (ret == -EILSEQ) {
- pr_warning("Probing address(0x%p) is not an "
- "instruction boundary.\n",
- tk->rp.kp.addr);
+ pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
+ tk->rp.kp.addr);
ret = -EINVAL;
}
}
@@ -529,7 +527,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
/* Register new event */
ret = register_kprobe_event(tk);
if (ret) {
- pr_warning("Failed to register probe event(%d)\n", ret);
+ pr_warn("Failed to register probe event(%d)\n", ret);
goto end;
}
@@ -564,10 +562,9 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
__unregister_trace_kprobe(tk);
ret = __register_trace_kprobe(tk);
if (ret)
- pr_warning("Failed to re-register probe %s on"
- "%s: %d\n",
- trace_event_name(&tk->tp.call),
- mod->name, ret);
+ pr_warn("Failed to re-register probe %s on %s: %d\n",
+ trace_event_name(&tk->tp.call),
+ mod->name, ret);
}
}
mutex_unlock(&probe_lock);
@@ -1336,16 +1333,14 @@ static __init int init_kprobe_trace(void)
/* Event list interface */
if (!entry)
- pr_warning("Could not create tracefs "
- "'kprobe_events' entry\n");
+ pr_warn("Could not create tracefs 'kprobe_events' entry\n");
/* Profile interface */
entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
NULL, &kprobe_profile_ops);
if (!entry)
- pr_warning("Could not create tracefs "
- "'kprobe_profile' entry\n");
+ pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
return 0;
}
fs_initcall(init_kprobe_trace);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 2be8c4f2403d..68f376ca6d3f 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -146,7 +146,7 @@ static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
/* XXX: This is later than where events were lost. */
trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
if (!overrun_detected)
- pr_warning("mmiotrace has lost events.\n");
+ pr_warn("mmiotrace has lost events\n");
overrun_detected = true;
goto print_out;
}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 8bb2071474dd..49f61fe96a6b 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -56,7 +56,7 @@ static void nop_trace_reset(struct trace_array *tr)
}
/* It only serves as a signal handler and a callback to
- * accept or refuse tthe setting of a flag.
+ * accept or refuse the setting of a flag.
* If you don't implement it, then the flag setting will be
* automatically accepted.
*/
@@ -75,7 +75,7 @@ static int nop_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
if (bit == TRACE_NOP_OPT_REFUSE) {
printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
- "Now cat trace_options to see the result\n",
+ " Now cat trace_options to see the result\n",
set);
return -EINVAL;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 282982195e09..0bb9cf2d53e6 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -389,7 +389,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
char irqs_off;
int hardirq;
int softirq;
+ int nmi;
+ nmi = entry->flags & TRACE_FLAG_NMI;
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
@@ -415,10 +417,12 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
}
hardsoft_irq =
+ (nmi && hardirq) ? 'Z' :
+ nmi ? 'z' :
(hardirq && softirq) ? 'H' :
- hardirq ? 'h' :
- softirq ? 's' :
- '.';
+ hardirq ? 'h' :
+ softirq ? 's' :
+ '.' ;
trace_seq_printf(s, "%c%c%c",
irqs_off, need_resched, hardsoft_irq);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 060df67dbdd1..f96f0383f6c6 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -296,6 +296,9 @@ static int t_show(struct seq_file *m, void *v)
const char *str = *fmt;
int i;
+ if (!*fmt)
+ return 0;
+
seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
/*
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1769a81da8a7..1d372fa6fefb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -636,8 +636,8 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
*tmp = '\0';
size = tmp - kbuf + 1;
} else if (done + size < count) {
- pr_warning("Line length is too long: "
- "Should be less than %d.", WRITE_BUFSIZE);
+ pr_warn("Line length is too long: Should be less than %d\n",
+ WRITE_BUFSIZE);
ret = -EINVAL;
goto out;
}
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 6cf935316769..413ff108fbd0 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -281,8 +281,7 @@ static int tracing_stat_init(void)
stat_dir = tracefs_create_dir("trace_stat", d_tracing);
if (!stat_dir)
- pr_warning("Could not create tracefs "
- "'trace_stat' entry\n");
+ pr_warn("Could not create tracefs 'trace_stat' entry\n");
return 0;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d1663083d903..e78f364cc192 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -106,6 +106,17 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
return syscalls_metadata[nr];
}
+const char *get_syscall_name(int syscall)
+{
+ struct syscall_metadata *entry;
+
+ entry = syscall_nr_to_meta(syscall);
+ if (!entry)
+ return NULL;
+
+ return entry->name;
+}
+
static enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_event *event)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d2f6d0be3503..7915142c89e4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -334,7 +334,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
ret = register_uprobe_event(tu);
if (ret) {
- pr_warning("Failed to register probe event(%d)\n", ret);
+ pr_warn("Failed to register probe event(%d)\n", ret);
goto end;
}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index ecd536de603a..d0639d917899 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -491,7 +491,7 @@ static __init int init_tracepoints(void)
ret = register_module_notifier(&tracepoint_module_nb);
if (ret)
- pr_warning("Failed to register tracepoint module enter notifier\n");
+ pr_warn("Failed to register tracepoint module enter notifier\n");
return ret;
}