From c464c76eec4be587604ca082e8cded7e6b89f3bf Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 18 Mar 2014 16:56:41 +0800 Subject: perf: Allow building PMU drivers as modules This patch adds support for building PMU driver as module. It exports the functions perf_pmu_{register,unregister}() and adds reference tracking for the PMU driver module. When the PMU driver is built as a module, each active event of the PMU holds a reference to the driver module. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1395133004-23205-1-git-send-email-zheng.z.yan@intel.com Cc: eranian@google.com Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..5129b1201050 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "internal.h" @@ -3229,6 +3230,9 @@ static void __free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); + if (event->pmu) + module_put(event->pmu->module); + call_rcu(&event->rcu_head, free_event_rcu); } static void free_event(struct perf_event *event) @@ -6551,6 +6555,7 @@ free_pdc: free_percpu(pmu->pmu_disable_count); goto unlock; } +EXPORT_SYMBOL_GPL(perf_pmu_register); void perf_pmu_unregister(struct pmu *pmu) { @@ -6572,6 +6577,7 @@ void perf_pmu_unregister(struct pmu *pmu) put_device(pmu->dev); free_pmu_context(pmu); } +EXPORT_SYMBOL_GPL(perf_pmu_unregister); struct pmu *perf_init_event(struct perf_event *event) { @@ -6585,6 +6591,10 @@ struct pmu *perf_init_event(struct perf_event *event) pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); if (pmu) { + if (!try_module_get(pmu->module)) { + pmu = ERR_PTR(-ENODEV); + goto unlock; + } event->pmu = pmu; ret = pmu->event_init(event); if (ret) @@ -6593,6 +6603,10 @@ struct pmu *perf_init_event(struct perf_event *event) } list_for_each_entry_rcu(pmu, &pmus, entry) { + if (!try_module_get(pmu->module)) { + pmu = ERR_PTR(-ENODEV); + goto unlock; + } event->pmu = pmu; ret = pmu->event_init(event); if (!ret) @@ -6771,6 +6785,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, err_pmu: if (event->destroy) event->destroy(event); + module_put(pmu->module); err_ns: if (event->ns) put_pid_ns(event->ns); -- cgit v1.2.3 From 1f4ee5038f0c1ef95f8e6d47ad6623e006b5bce1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 May 2014 09:59:34 +0200 Subject: perf: Ensure consistent inherit state in groups Make sure all events in a group have the same inherit state. It was possible for group leaders to have inherit set while sibling events would not have inherit set. In this case we'd still inherit the siblings, leading to some non-fatal weirdness. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-r32tt8yldvic3jlcghd3g35u@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index 09866a330af8..1de0d709f69f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7081,20 +7081,26 @@ SYSCALL_DEFINE5(perf_event_open, } } + if (task && group_leader && + group_leader->attr.inherit != attr.inherit) { + err = -EINVAL; + goto err_task; + } + get_online_cpus(); event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL); if (IS_ERR(event)) { err = PTR_ERR(event); - goto err_task; + goto err_cpus; } if (flags & PERF_FLAG_PID_CGROUP) { err = perf_cgroup_connect(pid, event, &attr, group_leader); if (err) { __free_event(event); - goto err_task; + goto err_cpus; } } @@ -7256,8 +7262,9 @@ err_context: put_ctx(ctx); err_alloc: free_event(event); -err_task: +err_cpus: put_online_cpus(); +err_task: if (task) put_task_struct(task); err_group_fd: -- cgit v1.2.3 From 15a2d4de0eab533a76bee9e68d7e1063dd25401c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 May 2014 11:41:02 +0200 Subject: perf: Always destroy groups on exit Commit 38b435b16c36 ("perf: Fix tear-down of inherited group events") states that we need to destroy groups for inherited events, but it doesn't make any sense to not also destroy groups for normal events. And while it usually makes no difference (the normal events won't leak, and its very likely all the group events will die in quick succession) it does make the code more consistent and closes a potential hole for trouble. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-426egt8zmsm12d2q8k2xz4tt@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index 1de0d709f69f..819ffc006d67 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7400,7 +7400,7 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - perf_remove_from_context(child_event, !!child_event->parent); + perf_remove_from_context(child_event, true); /* * It can happen that the parent exits first, and has events -- cgit v1.2.3 From 63342411efd2d9350ad405205da036cd45ed1640 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 May 2014 11:49:16 +0200 Subject: perf: Validate locking assumption Document and validate the locking assumption of event_sched_in(). Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-sybq1publ9xt5no77cwvi0eo@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index 819ffc006d67..0de199729f04 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1678,6 +1678,8 @@ event_sched_in(struct perf_event *event, u64 tstamp = perf_event_time(event); int ret = 0; + lockdep_assert_held(&ctx->lock); + if (event->state <= PERF_EVENT_STATE_OFF) return 0; -- cgit v1.2.3 From 683ede43dd412c6cad0d23578a018409ac9c683e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 May 2014 12:11:24 +0200 Subject: perf: Rework free paths Primarily make perf_event_release_kernel() into put_event(), this will allow kernel space to create per-task inherited events, and is safer in general. Also, document the free_event() assumptions. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rk9pvr6e1d0559lxstltbztc@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 66 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 26 deletions(-) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index 0de199729f04..83505a35afba 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3251,7 +3251,8 @@ static void __free_event(struct perf_event *event) call_rcu(&event->rcu_head, free_event_rcu); } -static void free_event(struct perf_event *event) + +static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending); @@ -3279,42 +3280,31 @@ static void free_event(struct perf_event *event) if (is_cgroup_event(event)) perf_detach_cgroup(event); - __free_event(event); } -int perf_event_release_kernel(struct perf_event *event) +/* + * Used to free events which have a known refcount of 1, such as in error paths + * where the event isn't exposed yet and inherited events. + */ +static void free_event(struct perf_event *event) { - struct perf_event_context *ctx = event->ctx; - - WARN_ON_ONCE(ctx->parent_ctx); - /* - * There are two ways this annotation is useful: - * - * 1) there is a lock recursion from perf_event_exit_task - * see the comment there. - * - * 2) there is a lock-inversion with mmap_sem through - * perf_event_read_group(), which takes faults while - * holding ctx->mutex, however this is called after - * the last filedesc died, so there is no possibility - * to trigger the AB-BA case. - */ - mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); - perf_remove_from_context(event, true); - mutex_unlock(&ctx->mutex); - - free_event(event); + if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1, + "unexpected event refcount: %ld; ptr=%p\n", + atomic_long_read(&event->refcount), event)) { + /* leak to avoid use-after-free */ + return; + } - return 0; + _free_event(event); } -EXPORT_SYMBOL_GPL(perf_event_release_kernel); /* * Called when the last reference to the file is gone. */ static void put_event(struct perf_event *event) { + struct perf_event_context *ctx = event->ctx; struct task_struct *owner; if (!atomic_long_dec_and_test(&event->refcount)) @@ -3353,9 +3343,33 @@ static void put_event(struct perf_event *event) put_task_struct(owner); } - perf_event_release_kernel(event); + WARN_ON_ONCE(ctx->parent_ctx); + /* + * There are two ways this annotation is useful: + * + * 1) there is a lock recursion from perf_event_exit_task + * see the comment there. + * + * 2) there is a lock-inversion with mmap_sem through + * perf_event_read_group(), which takes faults while + * holding ctx->mutex, however this is called after + * the last filedesc died, so there is no possibility + * to trigger the AB-BA case. + */ + mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); + perf_remove_from_context(event, true); + mutex_unlock(&ctx->mutex); + + _free_event(event); } +int perf_event_release_kernel(struct perf_event *event) +{ + put_event(event); + return 0; +} +EXPORT_SYMBOL_GPL(perf_event_release_kernel); + static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); -- cgit v1.2.3 From 3a497f48637e2aac17eabb84a17f8ac5216028fc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 May 2014 12:42:29 +0200 Subject: perf: Simplify perf_event_exit_task_context() Instead of jumping through hoops to make sure to find (and exit) each event, do it the simple straight fwd way. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-tij931199thfkys8vbnokdpf@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index 83505a35afba..7ab734fbaeeb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7485,24 +7485,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) */ mutex_lock(&child_ctx->mutex); -again: - list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, - group_entry) - __perf_event_exit_task(child_event, child_ctx, child); - - list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, - group_entry) + list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry) __perf_event_exit_task(child_event, child_ctx, child); - /* - * If the last event was a group event, it will have appended all - * its siblings to the list, but we obtained 'tmp' before that which - * will still point to the list head terminating the iteration. - */ - if (!list_empty(&child_ctx->pinned_groups) || - !list_empty(&child_ctx->flexible_groups)) - goto again; - mutex_unlock(&child_ctx->mutex); put_ctx(child_ctx); -- cgit v1.2.3 From 12665b35b0b48c9583ee1b8f0a403dc708fb4a92 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 10 May 2014 13:10:59 +0200 Subject: perf/events/core: Drop unused variable after cleanup ... in 3a497f48637 ("perf: Simplify perf_event_exit_task_context()") Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1399720259-28275-1-git-send-email-bp@alien8.de Signed-off-by: Thomas Gleixner --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events/core.c') diff --git a/kernel/events/core.c b/kernel/events/core.c index 7ab734fbaeeb..ed50b0943213 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7431,7 +7431,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { - struct perf_event *child_event, *tmp; + struct perf_event *child_event; struct perf_event_context *child_ctx; unsigned long flags; -- cgit v1.2.3