hw-breakpoints: Fix task-bound breakpoint slot allocation

Whatever the context nature of a breakpoint, we always perform the
following constraint checks before allocating it a slot:

- Check the number of pinned breakpoint bound the concerned cpus
- Check the max number of task-bound breakpoints that are belonging
  to a task.
- Add both and see if we have a reamining slot for the new breakpoint

This is the right thing to do when we are about to register a cpu-only
bound breakpoint. But not if we are dealing with a task bound
breakpoint. What we want in this case is:

- Check the number of pinned breakpoint bound the concerned cpus
- Check the number of breakpoints that already belong to the task
  in which the breakpoint to register is bound to.
- Add both

This fixes a regression that makes the "firefox -g" command fail to
register breakpoints once we deal with a secondary thread.

Reported-by: Walt <w41ter@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index b600fc2..02b4925 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -83,50 +83,16 @@
 	return 0;
 }
 
-/*
- * Report the number of pinned/un-pinned breakpoints we have in
- * a given cpu (cpu > -1) or in all of them (cpu = -1).
- */
-static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+static int task_bp_pinned(struct task_struct *tsk)
 {
-	if (cpu >= 0) {
-		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
-		slots->pinned += max_task_bp_pinned(cpu);
-		slots->flexible = per_cpu(nr_bp_flexible, cpu);
-
-		return;
-	}
-
-	for_each_online_cpu(cpu) {
-		unsigned int nr;
-
-		nr = per_cpu(nr_cpu_bp_pinned, cpu);
-		nr += max_task_bp_pinned(cpu);
-
-		if (nr > slots->pinned)
-			slots->pinned = nr;
-
-		nr = per_cpu(nr_bp_flexible, cpu);
-
-		if (nr > slots->flexible)
-			slots->flexible = nr;
-	}
-}
-
-/*
- * Add a pinned breakpoint for the given task in our constraint table
- */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
-{
-	int count = 0;
-	struct perf_event *bp;
 	struct perf_event_context *ctx = tsk->perf_event_ctxp;
-	unsigned int *tsk_pinned;
 	struct list_head *list;
+	struct perf_event *bp;
 	unsigned long flags;
+	int count = 0;
 
 	if (WARN_ONCE(!ctx, "No perf context for this task"))
-		return;
+		return 0;
 
 	list = &ctx->event_list;
 
@@ -143,8 +109,58 @@
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
 
-	if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
+	return count;
+}
+
+/*
+ * Report the number of pinned/un-pinned breakpoints we have in
+ * a given cpu (cpu > -1) or in all of them (cpu = -1).
+ */
+static void
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
+{
+	int cpu = bp->cpu;
+	struct task_struct *tsk = bp->ctx->task;
+
+	if (cpu >= 0) {
+		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+		if (!tsk)
+			slots->pinned += max_task_bp_pinned(cpu);
+		else
+			slots->pinned += task_bp_pinned(tsk);
+		slots->flexible = per_cpu(nr_bp_flexible, cpu);
+
 		return;
+	}
+
+	for_each_online_cpu(cpu) {
+		unsigned int nr;
+
+		nr = per_cpu(nr_cpu_bp_pinned, cpu);
+		if (!tsk)
+			nr += max_task_bp_pinned(cpu);
+		else
+			nr += task_bp_pinned(tsk);
+
+		if (nr > slots->pinned)
+			slots->pinned = nr;
+
+		nr = per_cpu(nr_bp_flexible, cpu);
+
+		if (nr > slots->flexible)
+			slots->flexible = nr;
+	}
+}
+
+/*
+ * Add a pinned breakpoint for the given task in our constraint table
+ */
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+{
+	unsigned int *tsk_pinned;
+	int count = 0;
+
+	count = task_bp_pinned(tsk);
 
 	tsk_pinned = per_cpu(task_bp_pinned, cpu);
 	if (enable) {
@@ -233,7 +249,7 @@
 
 	mutex_lock(&nr_bp_mutex);
 
-	fetch_bp_busy_slots(&slots, bp->cpu);
+	fetch_bp_busy_slots(&slots, bp);
 
 	/* Flexible counters need to keep at least one slot */
 	if (slots.pinned + (!!slots.flexible) == HBP_NUM) {