ftrace: cleanups

factor out code and clean it up.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2c1670c..953a36d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -69,7 +69,7 @@
 extern void ftrace_call(void);
 extern void mcount_call(void);
 #else
-# define ftrace_force_update() do { } while (0)
+# define ftrace_force_update() ({ 0; })
 #endif
 
 static inline void tracer_disable(void)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4facf5c..6d4d2e8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1152,10 +1152,10 @@
 
 core_initcall(ftrace_dynamic_init);
 #else
-# define ftrace_startup()	  do { } while (0)
-# define ftrace_shutdown()	  do { } while (0)
-# define ftrace_startup_sysctl()  do { } while (0)
-# define ftrace_shutdown_sysctl() do { } while (0)
+# define ftrace_startup()		do { } while (0)
+# define ftrace_shutdown()		do { } while (0)
+# define ftrace_startup_sysctl()	do { } while (0)
+# define ftrace_shutdown_sysctl()	do { } while (0)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f6d026f..61d2f02 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -142,12 +142,59 @@
 	tracing_record_cmdline(current);
 }
 
+void check_pages(struct trace_array_cpu *data)
+{
+	struct page *page, *tmp;
+
+	BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
+	BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
+
+	list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
+		BUG_ON(page->lru.next->prev != &page->lru);
+		BUG_ON(page->lru.prev->next != &page->lru);
+	}
+}
+
+void *head_page(struct trace_array_cpu *data)
+{
+	struct page *page;
+
+	check_pages(data);
+	if (list_empty(&data->trace_pages))
+		return NULL;
+
+	page = list_entry(data->trace_pages.next, struct page, lru);
+	BUG_ON(&page->lru == &data->trace_pages);
+
+	return page_address(page);
+}
+
+notrace static void
+flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
+{
+	struct list_head flip_pages;
+
+	INIT_LIST_HEAD(&flip_pages);
+
+	tr1->trace_current = NULL;
+	memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx,
+		sizeof(struct trace_array_cpu) -
+		offsetof(struct trace_array_cpu, trace_current_idx));
+
+	check_pages(tr1);
+	check_pages(tr2);
+	list_splice_init(&tr1->trace_pages, &flip_pages);
+	list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
+	list_splice_init(&flip_pages, &tr2->trace_pages);
+	BUG_ON(!list_empty(&flip_pages));
+	check_pages(tr1);
+	check_pages(tr2);
+}
+
 notrace void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
 	struct trace_array_cpu *data;
-	void *save_trace;
-	struct list_head save_pages;
 	int i;
 
 	WARN_ON_ONCE(!irqs_disabled());
@@ -155,11 +202,7 @@
 	/* clear out all the previous traces */
 	for_each_possible_cpu(i) {
 		data = tr->data[i];
-		save_trace = max_tr.data[i]->trace;
-		save_pages = max_tr.data[i]->trace_pages;
-		memcpy(max_tr.data[i], data, sizeof(*data));
-		data->trace = save_trace;
-		data->trace_pages = save_pages;
+		flip_trace(max_tr.data[i], data);
 		tracing_reset(data);
 	}
 
@@ -177,8 +220,6 @@
 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
 	struct trace_array_cpu *data = tr->data[cpu];
-	void *save_trace;
-	struct list_head save_pages;
 	int i;
 
 	WARN_ON_ONCE(!irqs_disabled());
@@ -186,11 +227,8 @@
 	for_each_possible_cpu(i)
 		tracing_reset(max_tr.data[i]);
 
-	save_trace = max_tr.data[cpu]->trace;
-	save_pages = max_tr.data[cpu]->trace_pages;
-	memcpy(max_tr.data[cpu], data, sizeof(*data));
-	data->trace = save_trace;
-	data->trace_pages = save_pages;
+	flip_trace(max_tr.data[cpu], data);
+
 	tracing_reset(data);
 
 	__update_max_tr(tr, tsk, cpu);
@@ -234,9 +272,9 @@
 		 * If we fail, we do not register this tracer.
 		 */
 		for_each_possible_cpu(i) {
-			if (!data->trace)
-				continue;
 			data = tr->data[i];
+			if (!head_page(data))
+				continue;
 			tracing_reset(data);
 		}
 		current_trace = type;
@@ -298,7 +336,7 @@
 void notrace tracing_reset(struct trace_array_cpu *data)
 {
 	data->trace_idx = 0;
-	data->trace_current = data->trace;
+	data->trace_current = head_page(data);
 	data->trace_current_idx = 0;
 }
 
@@ -425,26 +463,31 @@
 }
 
 static inline notrace struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr,
-			struct trace_array_cpu *data)
+tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
 {
 	unsigned long idx, idx_next;
 	struct trace_entry *entry;
-	struct page *page;
 	struct list_head *next;
+	struct page *page;
 
 	data->trace_idx++;
 	idx = data->trace_current_idx;
 	idx_next = idx + 1;
 
+	BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
+
 	entry = data->trace_current + idx * TRACE_ENTRY_SIZE;
 
 	if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
 		page = virt_to_page(data->trace_current);
-		if (unlikely(&page->lru == data->trace_pages.prev))
-			next = data->trace_pages.next;
-		else
-			next = page->lru.next;
+		/*
+		 * Roundrobin - but skip the head (which is not a real page):
+		 */
+		next = page->lru.next;
+		if (unlikely(next == &data->trace_pages))
+			next = next->next;
+		BUG_ON(next == &data->trace_pages);
+
 		page = list_entry(next, struct page, lru);
 		data->trace_current = page_address(page);
 		idx_next = 0;
@@ -456,18 +499,17 @@
 }
 
 static inline notrace void
-tracing_generic_entry_update(struct trace_entry *entry,
-			     unsigned long flags)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 {
 	struct task_struct *tsk = current;
 	unsigned long pc;
 
 	pc = preempt_count();
 
-	entry->idx	= atomic_inc_return(&tracer_counter);
-	entry->preempt_count = pc & 0xff;
-	entry->pid	 = tsk->pid;
-	entry->t	 = now(raw_smp_processor_id());
+	entry->idx		= atomic_inc_return(&tracer_counter);
+	entry->preempt_count	= pc & 0xff;
+	entry->pid		= tsk->pid;
+	entry->t		= now(raw_smp_processor_id());
 	entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
@@ -476,16 +518,15 @@
 
 notrace void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
-		       unsigned long ip, unsigned long parent_ip,
-		       unsigned long flags)
+       unsigned long ip, unsigned long parent_ip, unsigned long flags)
 {
 	struct trace_entry *entry;
 
-	entry = tracing_get_trace_entry(tr, data);
+	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
-	entry->type	    = TRACE_FN;
-	entry->fn.ip	    = ip;
-	entry->fn.parent_ip = parent_ip;
+	entry->type		= TRACE_FN;
+	entry->fn.ip		= ip;
+	entry->fn.parent_ip	= parent_ip;
 }
 
 notrace void
@@ -496,7 +537,7 @@
 {
 	struct trace_entry *entry;
 
-	entry = tracing_get_trace_entry(tr, data);
+	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_CTX;
 	entry->ctx.prev_pid	= prev->pid;
@@ -540,6 +581,8 @@
 	}
 
 	page = list_entry(iter->next_page[cpu], struct page, lru);
+	BUG_ON(&data->trace_pages == &page->lru);
+
 	array = page_address(page);
 
 	return &array[iter->next_page_idx[cpu]];
@@ -554,7 +597,7 @@
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		if (!tr->data[cpu]->trace)
+		if (!head_page(tr->data[cpu]))
 			continue;
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
 		if (ent &&
@@ -762,7 +805,7 @@
 		name = type->name;
 
 	for_each_possible_cpu(cpu) {
-		if (tr->data[cpu]->trace) {
+		if (head_page(tr->data[cpu])) {
 			total += tr->data[cpu]->trace_idx;
 			if (tr->data[cpu]->trace_idx > tr->entries)
 				entries += tr->entries;
@@ -975,8 +1018,7 @@
 	for_each_possible_cpu(cpu) {
 		data = iter->tr->data[cpu];
 
-		if (data->trace &&
-		    data->trace_idx)
+		if (head_page(data) && data->trace_idx)
 			return 0;
 	}
 	return 1;
@@ -1576,9 +1618,9 @@
 static int trace_alloc_page(void)
 {
 	struct trace_array_cpu *data;
-	void *array;
 	struct page *page, *tmp;
 	LIST_HEAD(pages);
+	void *array;
 	int i;
 
 	/* first allocate a page for each CPU */
@@ -1610,14 +1652,14 @@
 	for_each_possible_cpu(i) {
 		data = global_trace.data[i];
 		page = list_entry(pages.next, struct page, lru);
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 		list_add_tail(&page->lru, &data->trace_pages);
 		ClearPageLRU(page);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 		data = max_tr.data[i];
 		page = list_entry(pages.next, struct page, lru);
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 		list_add_tail(&page->lru, &data->trace_pages);
 		SetPageLRU(page);
 #endif
@@ -1628,7 +1670,7 @@
 
  free_pages:
 	list_for_each_entry_safe(page, tmp, &pages, lru) {
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 		__free_page(page);
 	}
 	return -ENOMEM;
@@ -1654,7 +1696,6 @@
 			       "for trace buffer!\n");
 			goto free_buffers;
 		}
-		data->trace = array;
 
 		/* set the array to the list */
 		INIT_LIST_HEAD(&data->trace_pages);
@@ -1671,7 +1712,6 @@
 			       "for trace buffer!\n");
 			goto free_buffers;
 		}
-		max_tr.data[i]->trace = array;
 
 		INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
 		page = virt_to_page(array);
@@ -1716,24 +1756,22 @@
 		struct page *page, *tmp;
 		struct trace_array_cpu *data = global_trace.data[i];
 
-		if (data && data->trace) {
+		if (data) {
 			list_for_each_entry_safe(page, tmp,
 						 &data->trace_pages, lru) {
-				list_del(&page->lru);
+				list_del_init(&page->lru);
 				__free_page(page);
 			}
-			data->trace = NULL;
 		}
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 		data = max_tr.data[i];
-		if (data && data->trace) {
+		if (data) {
 			list_for_each_entry_safe(page, tmp,
 						 &data->trace_pages, lru) {
-				list_del(&page->lru);
+				list_del_init(&page->lru);
 				__free_page(page);
 			}
-			data->trace = NULL;
 		}
 #endif
 	}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 88edbf1..cc1d34b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -53,12 +53,12 @@
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	void			*trace;
 	void			*trace_current;
-	unsigned		trace_current_idx;
 	struct list_head	trace_pages;
-	unsigned long		trace_idx;
 	atomic_t		disabled;
+	/* these fields get copied into max-trace: */
+	unsigned		trace_current_idx;
+	unsigned long		trace_idx;
 	unsigned long		saved_latency;
 	unsigned long		critical_start;
 	unsigned long		critical_end;
@@ -216,4 +216,6 @@
 #endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
+extern void *head_page(struct trace_array_cpu *data);
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 14183b8..2dfebb6 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -144,7 +144,7 @@
 	if (!report_latency(delta))
 		goto out;
 
-	spin_lock(&max_trace_lock);
+	spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
 	if (!report_latency(delta))
@@ -165,32 +165,24 @@
 
 	update_max_tr_single(tr, current, cpu);
 
-	if (tracing_thresh)
-		printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section "
-		       "violates %lu us threshold.\n"
-		       " => started at timestamp %lu: ",
-				current->comm, current->pid,
-				raw_smp_processor_id(),
-				latency, nsecs_to_usecs(tracing_thresh), t0);
-	else
+	if (tracing_thresh) {
 		printk(KERN_INFO "(%16s-%-5d|#%d):"
-		       " new %lu us maximum-latency "
-		       "critical section.\n => started at timestamp %lu: ",
+			" %lu us critical section violates %lu us threshold.\n",
 				current->comm, current->pid,
 				raw_smp_processor_id(),
-				latency, t0);
-
-	print_symbol(KERN_CONT "<%s>\n", data->critical_start);
-	printk(KERN_CONT " =>   ended at timestamp %lu: ", t1);
-	print_symbol(KERN_CONT "<%s>\n", data->critical_end);
-	dump_stack();
-	t1 = nsecs_to_usecs(now(cpu));
-	printk(KERN_CONT " =>   dump-end timestamp %lu\n\n", t1);
+				latency, nsecs_to_usecs(tracing_thresh));
+	} else {
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+		       " new %lu us maximum-latency critical section.\n",
+				current->comm, current->pid,
+				raw_smp_processor_id(),
+				latency);
+	}
 
 	max_sequence++;
 
 out_unlock:
-	spin_unlock(&max_trace_lock);
+	spin_unlock_irqrestore(&max_trace_lock, flags);
 
 out:
 	data->critical_sequence = max_sequence;
@@ -216,7 +208,7 @@
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!data->trace) ||
+	if (unlikely(!data) || unlikely(!head_page(data)) ||
 	    atomic_read(&data->disabled))
 		return;
 
@@ -256,7 +248,7 @@
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!data->trace) ||
+	if (unlikely(!data) || unlikely(!head_page(data)) ||
 	    !data->critical_start || atomic_read(&data->disabled))
 		return;
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3d10ff0..688df96 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -107,24 +107,18 @@
 	update_max_tr(tr, wakeup_task, wakeup_cpu);
 
 	if (tracing_thresh) {
-		printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency "
-		       "violates %lu us threshold.\n"
-		       " => started at timestamp %lu: ",
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+			" %lu us wakeup latency violates %lu us threshold.\n",
 				wakeup_task->comm, wakeup_task->pid,
 				raw_smp_processor_id(),
-				latency, nsecs_to_usecs(tracing_thresh), t0);
+				latency, nsecs_to_usecs(tracing_thresh));
 	} else {
-		printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum "
-		       "wakeup latency.\n => started at timestamp %lu: ",
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+			" new %lu us maximum wakeup latency.\n",
 				wakeup_task->comm, wakeup_task->pid,
-				cpu, latency, t0);
+				cpu, latency);
 	}
 
-	printk(KERN_CONT "   ended at timestamp %lu: ", t1);
-	dump_stack();
-	t1 = nsecs_to_usecs(now(cpu));
-	printk(KERN_CONT "   dump-end timestamp %lu\n\n", t1);
-
 out_unlock:
 	__wakeup_reset(tr);
 	spin_unlock_irqrestore(&wakeup_lock, flags);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index ef4d3cc..c01874c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,6 +1,7 @@
 /* Include in trace.c */
 
 #include <linux/kthread.h>
+#include <linux/delay.h>
 
 static inline int trace_valid_entry(struct trace_entry *entry)
 {
@@ -15,28 +16,29 @@
 static int
 trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
 {
-	struct page *page;
 	struct trace_entry *entries;
+	struct page *page;
 	int idx = 0;
 	int i;
 
+	BUG_ON(list_empty(&data->trace_pages));
 	page = list_entry(data->trace_pages.next, struct page, lru);
 	entries = page_address(page);
 
-	if (data->trace != entries)
+	if (head_page(data) != entries)
 		goto failed;
 
 	/*
 	 * The starting trace buffer always has valid elements,
-	 * if any element exits.
+	 * if any element exists.
 	 */
-	entries = data->trace;
+	entries = head_page(data);
 
 	for (i = 0; i < tr->entries; i++) {
 
-		if (i < data->trace_idx &&
-		    !trace_valid_entry(&entries[idx])) {
-			printk(KERN_CONT ".. invalid entry %d ", entries[idx].type);
+		if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+			printk(KERN_CONT ".. invalid entry %d ",
+				entries[idx].type);
 			goto failed;
 		}
 
@@ -80,11 +82,10 @@
 	int ret = 0;
 
 	for_each_possible_cpu(cpu) {
-		if (!tr->data[cpu]->trace)
+		if (!head_page(tr->data[cpu]))
 			continue;
 
 		cnt += tr->data[cpu]->trace_idx;
-		printk("%d: count = %ld\n", cpu, cnt);
 
 		ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
 		if (ret)
@@ -117,6 +118,8 @@
 	}
 
 	/* start the tracing */
+	ftrace_enabled = 1;
+
 	tr->ctrl = 1;
 	trace->init(tr);
 	/* Sleep for a 1/10 of a second */
@@ -124,6 +127,8 @@
 	/* stop the tracing. */
 	tr->ctrl = 0;
 	trace->ctrl_update(tr);
+	ftrace_enabled = 0;
+
 	/* check the trace buffer */
 	ret = trace_test_buffer(tr, &count);
 	trace->reset(tr);
@@ -328,7 +333,7 @@
 
 	/* create a high prio thread */
 	p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
-	if (!IS_ERR(p)) {
+	if (IS_ERR(p)) {
 		printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
 		return -1;
 	}