perf_counter: Fix a race on perf_counter_ctx
While extending perfcounters with BTS hw-tracing, Markus
Metzger managed to trigger this warning:
[ 995.557128] WARNING: at kernel/perf_counter.c:1191 __perf_counter_task_sched_out+0x48/0x6b()
triggers because commit
9f498cc5be7e013d8d6e4c616980ed0ffc8680d2 (perf_counter: Full
task tracing) removed clearing of tsk->perf_counter_ctxp out
from under ctx->lock which introduced a race (against
perf_lock_task_context).
Move it back and deal with the exit notification by explicitly
passing along the former task context.
Reported-by: Markus T Metzger <markus.t.metzger@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1249667341.17467.5.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0023105..546e62d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2850,7 +2850,8 @@
*/
struct perf_task_event {
- struct task_struct *task;
+ struct task_struct *task;
+ struct perf_counter_context *task_ctx;
struct {
struct perf_event_header header;
@@ -2910,24 +2911,23 @@
static void perf_counter_task_event(struct perf_task_event *task_event)
{
struct perf_cpu_context *cpuctx;
- struct perf_counter_context *ctx;
+ struct perf_counter_context *ctx = task_event->task_ctx;
cpuctx = &get_cpu_var(perf_cpu_context);
perf_counter_task_ctx(&cpuctx->ctx, task_event);
put_cpu_var(perf_cpu_context);
rcu_read_lock();
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
- ctx = rcu_dereference(current->perf_counter_ctxp);
+ if (!ctx)
+ ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
if (ctx)
perf_counter_task_ctx(ctx, task_event);
rcu_read_unlock();
}
-static void perf_counter_task(struct task_struct *task, int new)
+static void perf_counter_task(struct task_struct *task,
+ struct perf_counter_context *task_ctx,
+ int new)
{
struct perf_task_event task_event;
@@ -2937,8 +2937,9 @@
return;
task_event = (struct perf_task_event){
- .task = task,
- .event = {
+ .task = task,
+ .task_ctx = task_ctx,
+ .event = {
.header = {
.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
.misc = 0,
@@ -2956,7 +2957,7 @@
void perf_counter_fork(struct task_struct *task)
{
- perf_counter_task(task, 1);
+ perf_counter_task(task, NULL, 1);
}
/*
@@ -4310,7 +4311,7 @@
unsigned long flags;
if (likely(!child->perf_counter_ctxp)) {
- perf_counter_task(child, 0);
+ perf_counter_task(child, NULL, 0);
return;
}
@@ -4330,6 +4331,7 @@
* incremented the context's refcount before we do put_ctx below.
*/
spin_lock(&child_ctx->lock);
+ child->perf_counter_ctxp = NULL;
/*
* If this context is a clone; unclone it so it can't get
* swapped to another process while we're removing all
@@ -4343,9 +4345,7 @@
* won't get any samples after PERF_EVENT_EXIT. We can however still
* get a few PERF_EVENT_READ events.
*/
- perf_counter_task(child, 0);
-
- child->perf_counter_ctxp = NULL;
+ perf_counter_task(child, child_ctx, 0);
/*
* We can recurse on the same lock type through: