diff options
author | Steven Rostedt (Red Hat) <rostedt@goodmis.org> | 2016-12-08 12:40:18 -0500 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2016-12-09 09:18:14 -0500 |
commit | 656c7f0d2d2b3237a31b105d5ed217a65350104f (patch) | |
tree | 19ec88d4f4a29c68f4c7ef650a929c82bb858fed /kernel | |
parent | 847fa1a6d3d00f3bdf68ef5fa4a786f644a0dd67 (diff) | |
download | linux-656c7f0d2d2b3237a31b105d5ed217a65350104f.tar.gz linux-656c7f0d2d2b3237a31b105d5ed217a65350104f.tar.bz2 linux-656c7f0d2d2b3237a31b105d5ed217a65350104f.zip |
tracing: Replace kmap with copy_from_user() in trace_marker writing
Instead of using get_user_pages_fast() and kmap_atomic() when writing
to the trace_marker file, just allocate enough space on the ring buffer
directly, and write into it via copy_from_user().
Writing into the trace_marker file use to allocate a temporary buffer
to perform the copy_from_user(), as we didn't want to write into the
ring buffer if the copy failed. But as a trace_marker write is suppose
to be extremely fast, and allocating memory causes other tracepoints to
trigger, Peter Zijlstra suggested using get_user_pages_fast() and
kmap_atomic() to keep the user space pages in memory and reading it
directly. But Henrik Austad had issues with this because it required taking
the mm->mmap_sem and causing long delays with the write.
Instead, just allocate the space in the ring buffer and use
copy_from_user() directly. If it faults, return -EFAULT and write
"<faulted>" into the ring buffer.
Link: http://lkml.kernel.org/r/20161208124018.72dd0f86@gandalf.local.home
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Henrik Austad <henrik@austad.us>
Cc: Peter Zijlstra <peterz@infradead.org>
Updates: d696b58ca2c3ca "tracing: Do not allocate buffer for trace_marker"
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/trace.c | 139 |
1 files changed, 37 insertions, 102 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 60416bf7c591..6f420d7b703b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5738,61 +5738,6 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) return 0; } -static inline int lock_user_pages(const char __user *ubuf, size_t cnt, - struct page **pages, void **map_page, - int *offset) -{ - unsigned long addr = (unsigned long)ubuf; - int nr_pages = 1; - int ret; - int i; - - /* - * Userspace is injecting traces into the kernel trace buffer. - * We want to be as non intrusive as possible. - * To do so, we do not want to allocate any special buffers - * or take any locks, but instead write the userspace data - * straight into the ring buffer. - * - * First we need to pin the userspace buffer into memory, - * which, most likely it is, because it just referenced it. - * But there's no guarantee that it is. By using get_user_pages_fast() - * and kmap_atomic/kunmap_atomic() we can get access to the - * pages directly. We then write the data directly into the - * ring buffer. - */ - - /* check if we cross pages */ - if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) - nr_pages = 2; - - *offset = addr & (PAGE_SIZE - 1); - addr &= PAGE_MASK; - - ret = get_user_pages_fast(addr, nr_pages, 0, pages); - if (ret < nr_pages) { - while (--ret >= 0) - put_page(pages[ret]); - return -EFAULT; - } - - for (i = 0; i < nr_pages; i++) - map_page[i] = kmap_atomic(pages[i]); - - return nr_pages; -} - -static inline void unlock_user_pages(struct page **pages, - void **map_page, int nr_pages) -{ - int i; - - for (i = nr_pages - 1; i >= 0; i--) { - kunmap_atomic(map_page[i]); - put_page(pages[i]); - } -} - static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) @@ -5802,14 +5747,14 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct ring_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; - struct page *pages[2]; - void *map_page[2]; - int nr_pages = 1; + const char faulted[] = "<faulted>"; ssize_t written; - int offset; int size; int len; +/* Used in tracing_mark_raw_write() as well */ +#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */ + if (tracing_disabled) return -EINVAL; @@ -5821,30 +5766,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset); - if (nr_pages < 0) - return nr_pages; - local_save_flags(irq_flags); - size = sizeof(*entry) + cnt + 2; /* possible \n added */ + size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ + + /* If less than "<faulted>", then make sure we can still add that */ + if (cnt < FAULTED_SIZE) + size += FAULTED_SIZE - cnt; + buffer = tr->trace_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); - if (!event) { + if (unlikely(!event)) /* Ring buffer disabled, return as if not open for write */ - written = -EBADF; - goto out_unlock; - } + return -EBADF; entry = ring_buffer_event_data(event); entry->ip = _THIS_IP_; - if (nr_pages == 2) { - len = PAGE_SIZE - offset; - memcpy(&entry->buf, map_page[0] + offset, len); - memcpy(&entry->buf[len], map_page[1], cnt - len); + len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); + if (len) { + memcpy(&entry->buf, faulted, FAULTED_SIZE); + cnt = FAULTED_SIZE; + written = -EFAULT; } else - memcpy(&entry->buf, map_page[0] + offset, cnt); + written = cnt; + len = cnt; if (entry->buf[cnt - 1] != '\n') { entry->buf[cnt] = '\n'; @@ -5854,12 +5800,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, __buffer_unlock_commit(buffer, event); - written = cnt; - - *fpos += written; - - out_unlock: - unlock_user_pages(pages, map_page, nr_pages); + if (written > 0) + *fpos += written; return written; } @@ -5875,15 +5817,14 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct ring_buffer *buffer; struct raw_data_entry *entry; + const char faulted[] = "<faulted>"; unsigned long irq_flags; - struct page *pages[2]; - void *map_page[2]; - int nr_pages = 1; ssize_t written; - int offset; int size; int len; +#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) + if (tracing_disabled) return -EINVAL; @@ -5899,38 +5840,32 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset); - if (nr_pages < 0) - return nr_pages; - local_save_flags(irq_flags); size = sizeof(*entry) + cnt; + if (cnt < FAULT_SIZE_ID) + size += FAULT_SIZE_ID - cnt; + buffer = tr->trace_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, irq_flags, preempt_count()); - if (!event) { + if (!event) /* Ring buffer disabled, return as if not open for write */ - written = -EBADF; - goto out_unlock; - } + return -EBADF; entry = ring_buffer_event_data(event); - if (nr_pages == 2) { - len = PAGE_SIZE - offset; - memcpy(&entry->id, map_page[0] + offset, len); - memcpy(((char *)&entry->id) + len, map_page[1], cnt - len); + len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); + if (len) { + entry->id = -1; + memcpy(&entry->buf, faulted, FAULTED_SIZE); + written = -EFAULT; } else - memcpy(&entry->id, map_page[0] + offset, cnt); + written = cnt; __buffer_unlock_commit(buffer, event); - written = cnt; - - *fpos += written; - - out_unlock: - unlock_user_pages(pages, map_page, nr_pages); + if (written > 0) + *fpos += written; return written; } |