summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ring_buffer.c158
-rw-r--r--kernel/trace/trace.c43
2 files changed, 125 insertions, 76 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3103a484182e..09df645ab9c7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -834,51 +834,24 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
ret = !pagebusy && full_hit(buffer, cpu, full);
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
+ if (!ret && (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)) {
+ cpu_buffer->shortest_full = full;
+ }
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
return ret;
}
-/**
- * ring_buffer_wait - wait for input to the ring buffer
- * @buffer: buffer to wait on
- * @cpu: the cpu buffer to wait on
- * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
- *
- * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
- * as data is added to any of the @buffer's cpu buffers. Otherwise
- * it will wait for data to be added to a specific cpu buffer.
- */
-int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+static inline bool
+rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
+ int cpu, int full, ring_buffer_cond_fn cond, void *data)
{
- struct ring_buffer_per_cpu *cpu_buffer;
- DEFINE_WAIT(wait);
- struct rb_irq_work *work;
- int ret = 0;
-
- /*
- * Depending on what the caller is waiting for, either any
- * data in any cpu buffer, or a specific buffer, put the
- * caller on the appropriate wait queue.
- */
- if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
- /* Full only makes sense on per cpu reads */
- full = 0;
- } else {
- if (!cpumask_test_cpu(cpu, buffer->cpumask))
- return -ENODEV;
- cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
- }
+ if (rb_watermark_hit(buffer, cpu, full))
+ return true;
- if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
- else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+ if (cond(data))
+ return true;
/*
* The events can happen in critical sections where
@@ -901,27 +874,78 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* a task has been queued. It's OK for spurious wake ups.
*/
if (full)
- work->full_waiters_pending = true;
+ rbwork->full_waiters_pending = true;
else
- work->waiters_pending = true;
+ rbwork->waiters_pending = true;
- if (rb_watermark_hit(buffer, cpu, full))
- goto out;
+ return false;
+}
- if (signal_pending(current)) {
- ret = -EINTR;
- goto out;
+/*
+ * The default wait condition for ring_buffer_wait() is to just to exit the
+ * wait loop the first time it is woken up.
+ */
+static bool rb_wait_once(void *data)
+{
+ long *once = data;
+
+ /* wait_event() actually calls this twice before scheduling*/
+ if (*once > 1)
+ return true;
+
+ (*once)++;
+ return false;
+}
+
+/**
+ * ring_buffer_wait - wait for input to the ring buffer
+ * @buffer: buffer to wait on
+ * @cpu: the cpu buffer to wait on
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @cond: condition function to break out of wait (NULL to run once)
+ * @data: the data to pass to @cond.
+ *
+ * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+ * as data is added to any of the @buffer's cpu buffers. Otherwise
+ * it will wait for data to be added to a specific cpu buffer.
+ */
+int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+ ring_buffer_cond_fn cond, void *data)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct wait_queue_head *waitq;
+ struct rb_irq_work *rbwork;
+ long once = 0;
+ int ret = 0;
+
+ if (!cond) {
+ cond = rb_wait_once;
+ data = &once;
+ }
+
+ /*
+ * Depending on what the caller is waiting for, either any
+ * data in any cpu buffer, or a specific buffer, put the
+ * caller on the appropriate wait queue.
+ */
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ rbwork = &buffer->irq_work;
+ /* Full only makes sense on per cpu reads */
+ full = 0;
+ } else {
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -ENODEV;
+ cpu_buffer = buffer->buffers[cpu];
+ rbwork = &cpu_buffer->irq_work;
}
- schedule();
- out:
if (full)
- finish_wait(&work->full_waiters, &wait);
+ waitq = &rbwork->full_waiters;
else
- finish_wait(&work->waiters, &wait);
+ waitq = &rbwork->waiters;
- if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
- ret = -EINTR;
+ ret = wait_event_interruptible((*waitq),
+ rb_wait_cond(rbwork, buffer, cpu, full, cond, data));
return ret;
}
@@ -959,21 +983,30 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
}
if (full) {
- unsigned long flags;
-
poll_wait(filp, &rbwork->full_waiters, poll_table);
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ if (rb_watermark_hit(buffer, cpu, full))
+ return EPOLLIN | EPOLLRDNORM;
+ /*
+ * Only allow full_waiters_pending update to be seen after
+ * the shortest_full is set (in rb_watermark_hit). If the
+ * writer sees the full_waiters_pending flag set, it will
+ * compare the amount in the ring buffer to shortest_full.
+ * If the amount in the ring buffer is greater than the
+ * shortest_full percent, it will call the irq_work handler
+ * to wake up this list. The irq_handler will reset shortest_full
+ * back to zero. That's done under the reader_lock, but
+ * the below smp_mb() makes sure that the update to
+ * full_waiters_pending doesn't leak up into the above.
+ */
+ smp_mb();
rbwork->full_waiters_pending = true;
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- } else {
- poll_wait(filp, &rbwork->waiters, poll_table);
- rbwork->waiters_pending = true;
+ return 0;
}
+ poll_wait(filp, &rbwork->waiters, poll_table);
+ rbwork->waiters_pending = true;
+
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -989,9 +1022,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
*/
smp_mb();
- if (full)
- return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
-
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e2d3969cb762..ab4c1a1fbda8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1955,15 +1955,36 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
#endif /* CONFIG_TRACER_MAX_TRACE */
+struct pipe_wait {
+ struct trace_iterator *iter;
+ int wait_index;
+};
+
+static bool wait_pipe_cond(void *data)
+{
+ struct pipe_wait *pwait = data;
+ struct trace_iterator *iter = pwait->iter;
+
+ if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
+ return true;
+
+ return iter->closed;
+}
+
static int wait_on_pipe(struct trace_iterator *iter, int full)
{
+ struct pipe_wait pwait;
int ret;
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
return 0;
- ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
+ pwait.wait_index = atomic_read_acquire(&iter->wait_index);
+ pwait.iter = iter;
+
+ ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
+ wait_pipe_cond, &pwait);
#ifdef CONFIG_TRACER_MAX_TRACE
/*
@@ -8397,9 +8418,9 @@ static int tracing_buffers_flush(struct file *file, fl_owner_t id)
struct ftrace_buffer_info *info = file->private_data;
struct trace_iterator *iter = &info->iter;
- iter->wait_index++;
+ iter->closed = true;
/* Make sure the waiters see the new wait_index */
- smp_wmb();
+ (void)atomic_fetch_inc_release(&iter->wait_index);
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
@@ -8499,6 +8520,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
+ bool woken = false;
int page_size;
int entries, i;
ssize_t ret = 0;
@@ -8572,17 +8594,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */
if (!spd.nr_pages) {
- long wait_index;
if (ret)
goto out;
+ if (woken)
+ goto out;
+
ret = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out;
- wait_index = READ_ONCE(iter->wait_index);
-
ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
if (ret)
goto out;
@@ -8591,10 +8613,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if (!tracer_tracing_is_on(iter->tr))
goto out;
- /* Make sure we see the new wait_index */
- smp_rmb();
- if (wait_index != iter->wait_index)
- goto out;
+ /* Iterate one more time to collect any new data then exit */
+ woken = true;
goto again;
}
@@ -8617,9 +8637,8 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
mutex_lock(&trace_types_lock);
- iter->wait_index++;
/* Make sure the waiters see the new wait_index */
- smp_wmb();
+ (void)atomic_fetch_inc_release(&iter->wait_index);
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);