Merge remote-tracking branch 'tip/perf/urgent' into perf/core

Merge reason: We are going to queue up a dependent patch:

"perf tools: Move parse event automated tests to separated object"

That depends on:

commit e7c72d8
perf tools: Add 'G' and 'H' modifiers to event parsing

Conflicts:
	tools/perf/builtin-stat.c

Conflicted with the recent 'perf_target' patches when checking the
result of perf_evsel open routines to see if a retry is needed to cope
with older kernels where the exclude guest/host perf_event_attr bits
were not used.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/Makefile b/Makefile
index a06ee9f..606048a 100644
--- a/Makefile
+++ b/Makefile
@@ -1468,6 +1468,13 @@
 kernelversion:
 	@echo $(KERNELVERSION)
 
+# Clear a bunch of variables before executing the submake
+tools/: FORCE
+	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS= -C $(src)/tools/
+
+tools/%: FORCE
+	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS= -C $(src)/tools/ $*
+
 # Single targets
 # ---------------------------------------------------------------------------
 # Single targets are compatible with:
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 0dae252..d821b17 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,6 @@
 
 	idx = la_ptr;
 
-	perf_sample_data_init(&data, 0);
 	for (j = 0; j < cpuc->n_events; j++) {
 		if (cpuc->current_idx[j] == idx)
 			break;
@@ -848,7 +847,7 @@
 
 	hwc = &event->hw;
 	alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
-	data.period = event->hw.last_period;
+	perf_sample_data_init(&data, 0, hwc->last_period);
 
 	if (alpha_perf_event_set_period(event, hwc, idx)) {
 		if (perf_event_overflow(event, &data, regs)) {
diff --git a/arch/arm/configs/bcmring_defconfig b/arch/arm/configs/bcmring_defconfig
index 795374d..9e6a8fe 100644
--- a/arch/arm/configs/bcmring_defconfig
+++ b/arch/arm/configs/bcmring_defconfig
@@ -11,7 +11,7 @@
 # CONFIG_TIMERFD is not set
 # CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index b78af0c..ab627a7 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -489,8 +489,6 @@
 	 */
 	armv6_pmcr_write(pmcr);
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
@@ -509,7 +507,7 @@
 
 		hwc = &event->hw;
 		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, hwc->last_period);
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 00755d8..d3c5360 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1077,8 +1077,6 @@
 	 */
 	regs = get_irq_regs();
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
@@ -1097,7 +1095,7 @@
 
 		hwc = &event->hw;
 		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, hwc->last_period);
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 71a21e6..e34e725 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -248,8 +248,6 @@
 
 	regs = get_irq_regs();
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
@@ -263,7 +261,7 @@
 
 		hwc = &event->hw;
 		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, hwc->last_period);
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
@@ -588,8 +586,6 @@
 
 	regs = get_irq_regs();
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
@@ -603,7 +599,7 @@
 
 		hwc = &event->hw;
 		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, hwc->last_period);
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 811084f..ab73fa2 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1325,7 +1325,7 @@
 
 	regs = get_irq_regs();
 
-	perf_sample_data_init(&data, 0);
+	perf_sample_data_init(&data, 0, 0);
 
 	switch (counters) {
 #define HANDLE_COUNTER(n)						\
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index f104ccd..b1f9597 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -32,7 +32,7 @@
 CONFIG_INITRAMFS_COMPRESSION_GZIP=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
 CONFIG_KPROBES=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index e74d3a4..9ef2cc1 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -8,7 +8,7 @@
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_ELF_CORE is not set
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 175295f..1e2b7d0 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -9,7 +9,7 @@
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_ELF_CORE is not set
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 02aee03..8f84bcb 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1299,8 +1299,7 @@
 	if (record) {
 		struct perf_sample_data data;
 
-		perf_sample_data_init(&data, ~0ULL);
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
 
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 0a6d2a9..106c533 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -613,8 +613,7 @@
 	if (record) {
 		struct perf_sample_data data;
 
-		perf_sample_data_init(&data, 0);
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, event->hw.last_period);
 
 		if (perf_event_overflow(event, &data, regs))
 			fsl_emb_pmu_stop(event, 0);
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 7b9c696..9bdcf72 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -5,7 +5,7 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=16
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 3c1e858..9d8521b 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -5,7 +5,7 @@
 CONFIG_POSIX_MQUEUE=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_PERF_COUNTERS=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 28559ce..5713957 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1296,8 +1296,6 @@
 
 	regs = args->regs;
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	/* If the PMU has the TOE IRQ enable bits, we need to do a
@@ -1321,7 +1319,7 @@
 		if (val & (1ULL << 31))
 			continue;
 
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, hwc->last_period);
 		if (!sparc_perf_event_set_period(event, hwc, idx))
 			continue;
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d14cc6..1324139 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,7 +40,6 @@
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
-	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KVM
 	select HAVE_ARCH_KGDB
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783..18d9005 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
+extern int modifying_ftrace_code;
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
@@ -50,6 +51,8 @@
 	/* No extra data needed for x86 */
 };
 
+int ftrace_int3_handler(struct pt_regs *regs);
+
 #endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb8059..957ec87 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
 #define MSR_AMD64_IBSFETCHCTL		0xc0011030
 #define MSR_AMD64_IBSFETCHLINAD		0xc0011031
 #define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT	3
+#define MSR_AMD64_IBSFETCH_REG_MASK	((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
 #define MSR_AMD64_IBSOPCTL		0xc0011033
 #define MSR_AMD64_IBSOPRIP		0xc0011034
 #define MSR_AMD64_IBSOPDATA		0xc0011035
@@ -141,8 +143,11 @@
 #define MSR_AMD64_IBSOPDATA3		0xc0011037
 #define MSR_AMD64_IBSDCLINAD		0xc0011038
 #define MSR_AMD64_IBSDCPHYSAD		0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT	7
+#define MSR_AMD64_IBSOP_REG_MASK	((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
 #define MSR_AMD64_IBSCTL		0xc001103a
 #define MSR_AMD64_IBSBRTARGET		0xc001103b
+#define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
 
 /* Fam 15h MSRs */
 #define MSR_F15H_PERF_CTL		0xc0010200
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895..588f52e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@
 #define IBS_CAPS_OPCNT			(1U<<4)
 #define IBS_CAPS_BRNTRGT		(1U<<5)
 #define IBS_CAPS_OPCNTEXT		(1U<<6)
+#define IBS_CAPS_RIPINVALIDCHK		(1U<<7)
 
 #define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
 					 | IBS_CAPS_FETCHSAM	\
@@ -170,21 +171,28 @@
 #define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
 #define IBSCTL_LVT_OFFSET_MASK		0x0F
 
-/* IbsFetchCtl bits/masks */
+/* ibs fetch bits/masks */
 #define IBS_FETCH_RAND_EN	(1ULL<<57)
 #define IBS_FETCH_VAL		(1ULL<<49)
 #define IBS_FETCH_ENABLE	(1ULL<<48)
 #define IBS_FETCH_CNT		0xFFFF0000ULL
 #define IBS_FETCH_MAX_CNT	0x0000FFFFULL
 
-/* IbsOpCtl bits */
+/* ibs op bits/masks */
+/* lower 4 bits of the current count are ignored: */
+#define IBS_OP_CUR_CNT		(0xFFFF0ULL<<32)
 #define IBS_OP_CNT_CTL		(1ULL<<19)
 #define IBS_OP_VAL		(1ULL<<18)
 #define IBS_OP_ENABLE		(1ULL<<17)
 #define IBS_OP_MAX_CNT		0x0000FFFFULL
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
+#define IBS_RIP_INVALID		(1ULL<<38)
 
+#ifdef CONFIG_X86_LOCAL_APIC
 extern u32 get_ibs_caps(void);
+#else
+static inline u32 get_ibs_caps(void) { return 0; }
+#endif
 
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e034..e049d6d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@
 
 	/* mark unused */
 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
-
-	/* mark not used */
-	event->hw.extra_reg.idx = EXTRA_REG_NONE;
 	event->hw.branch_reg.idx = EXTRA_REG_NONE;
 
 	return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@
 	int idx, handled = 0;
 	u64 val;
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	/*
@@ -1222,7 +1217,7 @@
 		 * event overflow
 		 */
 		handled++;
-		data.period	= event->hw.last_period;
+		perf_sample_data_init(&data, 0, event->hw.last_period);
 
 		if (!x86_perf_event_set_period(event))
 			continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1..6565226 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@
 
 static int amd_pmu_hw_config(struct perf_event *event)
 {
-	int ret = x86_pmu_hw_config(event);
+	int ret;
 
+	/* pass precise event sampling to ibs: */
+	if (event->attr.precise_ip && get_ibs_caps())
+		return -ENOENT;
+
+	ret = x86_pmu_hw_config(event);
 	if (ret)
 		return ret;
 
@@ -205,10 +210,8 @@
 	 * when we come here
 	 */
 	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (nb->owners[i] == event) {
-			cmpxchg(nb->owners+i, event, NULL);
+		if (cmpxchg(nb->owners + i, event, NULL) == event)
 			break;
-		}
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d3..da9bcdc 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
 #include <linux/perf_event.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/ptrace.h>
 
 #include <asm/apic.h>
 
@@ -16,36 +17,591 @@
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
 
-static struct pmu perf_ibs;
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK	(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK	IBS_OP_MAX_CNT
+
+enum ibs_states {
+	IBS_ENABLED	= 0,
+	IBS_STARTED	= 1,
+	IBS_STOPPING	= 2,
+
+	IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+	struct perf_event	*event;
+	unsigned long		state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+	struct pmu	pmu;
+	unsigned int	msr;
+	u64		config_mask;
+	u64		cnt_mask;
+	u64		enable_mask;
+	u64		valid_mask;
+	u64		max_period;
+	unsigned long	offset_mask[1];
+	int		offset_max;
+	struct cpu_perf_ibs __percpu *pcpu;
+	u64		(*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+	u32		size;
+	union {
+		u32	data[0];	/* data buffer starts here */
+		u32	caps;
+	};
+	u64		regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int overflow = 0;
+
+	/*
+	 * If we are way outside a reasonable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (unlikely(left < (s64)min)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	/*
+	 * If the hw period that triggers the sw overflow is too short
+	 * we might hit the irq handler. This biases the results.
+	 * Thus we shorten the next-to-last period and set the last
+	 * period to the max period.
+	 */
+	if (left > max) {
+		left -= max;
+		if (left > max)
+			left = max;
+		else if (left < min)
+			left = min;
+	}
+
+	*hw_period = (u64)left;
+
+	return overflow;
+}
+
+static  int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int shift = 64 - width;
+	u64 prev_raw_count;
+	u64 delta;
+
+	/*
+	 * Careful: an NMI might modify the previous event value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic event atomically:
+	 */
+	prev_raw_count = local64_read(&hwc->prev_count);
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		return 0;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+	if (perf_ibs_fetch.pmu.type == type)
+		return &perf_ibs_fetch;
+	if (perf_ibs_op.pmu.type == type)
+		return &perf_ibs_op;
+	return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
+ *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
+ *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+	switch (event->attr.precise_ip) {
+	case 0:
+		return -ENOENT;
+	case 1:
+	case 2:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		switch (event->attr.config) {
+		case PERF_COUNT_HW_CPU_CYCLES:
+			*config = 0;
+			return 0;
+		}
+		break;
+	case PERF_TYPE_RAW:
+		switch (event->attr.config) {
+		case 0x0076:
+			*config = 0;
+			return 0;
+		case 0x00C1:
+			*config = IBS_OP_CNT_CTL;
+			return 0;
+		}
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	return -EOPNOTSUPP;
+}
 
 static int perf_ibs_init(struct perf_event *event)
 {
-	if (perf_ibs.type != event->attr.type)
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_ibs *perf_ibs;
+	u64 max_cnt, config;
+	int ret;
+
+	perf_ibs = get_ibs_pmu(event->attr.type);
+	if (perf_ibs) {
+		config = event->attr.config;
+	} else {
+		perf_ibs = &perf_ibs_op;
+		ret = perf_ibs_precise_event(event, &config);
+		if (ret)
+			return ret;
+	}
+
+	if (event->pmu != &perf_ibs->pmu)
 		return -ENOENT;
+
+	if (config & ~perf_ibs->config_mask)
+		return -EINVAL;
+
+	if (hwc->sample_period) {
+		if (config & perf_ibs->cnt_mask)
+			/* raw max_cnt may not be set */
+			return -EINVAL;
+		if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+			/*
+			 * lower 4 bits can not be set in ibs max cnt,
+			 * but allowing it in case we adjust the
+			 * sample period to set a frequency.
+			 */
+			return -EINVAL;
+		hwc->sample_period &= ~0x0FULL;
+		if (!hwc->sample_period)
+			hwc->sample_period = 0x10;
+	} else {
+		max_cnt = config & perf_ibs->cnt_mask;
+		config &= ~perf_ibs->cnt_mask;
+		event->attr.sample_period = max_cnt << 4;
+		hwc->sample_period = event->attr.sample_period;
+	}
+
+	if (!hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * If we modify hwc->sample_period, we also need to update
+	 * hwc->last_period and hwc->period_left.
+	 */
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+
+	hwc->config_base = perf_ibs->msr;
+	hwc->config = config;
+
 	return 0;
 }
 
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+			       struct hw_perf_event *hwc, u64 *period)
+{
+	int overflow;
+
+	/* ignore lower 4 bits in min count: */
+	overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+	local64_set(&hwc->prev_count, 0);
+
+	return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+	return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+	u64 count = 0;
+
+	if (config & IBS_OP_VAL)
+		count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+	if (ibs_caps & IBS_CAPS_RDWROPCNT)
+		count += (config & IBS_OP_CUR_CNT) >> 32;
+
+	return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+		      u64 *config)
+{
+	u64 count = perf_ibs->get_count(*config);
+
+	/*
+	 * Set width to 64 since we do not overflow on max width but
+	 * instead on max count. In perf_ibs_set_period() we clear
+	 * prev count manually on overflow.
+	 */
+	while (!perf_event_try_update(event, count, 64)) {
+		rdmsrl(event->hw.config_base, *config);
+		count = perf_ibs->get_count(*config);
+	}
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+					 struct hw_perf_event *hwc, u64 config)
+{
+	wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+					  struct hw_perf_event *hwc, u64 config)
+{
+	config &= ~perf_ibs->cnt_mask;
+	wrmsrl(hwc->config_base, config);
+	config &= ~perf_ibs->enable_mask;
+	wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+	u64 period;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	perf_ibs_set_period(perf_ibs, hwc, &period);
+	set_bit(IBS_STARTED, pcpu->state);
+	perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+	perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+	u64 config;
+	int stopping;
+
+	stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+	if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+		return;
+
+	rdmsrl(hwc->config_base, config);
+
+	if (stopping) {
+		set_bit(IBS_STOPPING, pcpu->state);
+		perf_ibs_disable_event(perf_ibs, hwc, config);
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	/*
+	 * Clear valid bit to not count rollovers on update, rollovers
+	 * are only updated in the irq handler.
+	 */
+	config &= ~perf_ibs->valid_mask;
+
+	perf_ibs_event_update(perf_ibs, event, &config);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
 static int perf_ibs_add(struct perf_event *event, int flags)
 {
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+	if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+		return -ENOSPC;
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	pcpu->event = event;
+
+	if (flags & PERF_EF_START)
+		perf_ibs_start(event, PERF_EF_RELOAD);
+
 	return 0;
 }
 
 static void perf_ibs_del(struct perf_event *event, int flags)
 {
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+	if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+		return;
+
+	perf_ibs_stop(event, PERF_EF_UPDATE);
+
+	pcpu->event = NULL;
+
+	perf_event_update_userpage(event);
 }
 
-static struct pmu perf_ibs = {
-	.event_init= perf_ibs_init,
-	.add= perf_ibs_add,
-	.del= perf_ibs_del,
+static void perf_ibs_read(struct perf_event *event) { }
+
+static struct perf_ibs perf_ibs_fetch = {
+	.pmu = {
+		.task_ctx_nr	= perf_invalid_context,
+
+		.event_init	= perf_ibs_init,
+		.add		= perf_ibs_add,
+		.del		= perf_ibs_del,
+		.start		= perf_ibs_start,
+		.stop		= perf_ibs_stop,
+		.read		= perf_ibs_read,
+	},
+	.msr			= MSR_AMD64_IBSFETCHCTL,
+	.config_mask		= IBS_FETCH_CONFIG_MASK,
+	.cnt_mask		= IBS_FETCH_MAX_CNT,
+	.enable_mask		= IBS_FETCH_ENABLE,
+	.valid_mask		= IBS_FETCH_VAL,
+	.max_period		= IBS_FETCH_MAX_CNT << 4,
+	.offset_mask		= { MSR_AMD64_IBSFETCH_REG_MASK },
+	.offset_max		= MSR_AMD64_IBSFETCH_REG_COUNT,
+
+	.get_count		= get_ibs_fetch_count,
 };
 
+static struct perf_ibs perf_ibs_op = {
+	.pmu = {
+		.task_ctx_nr	= perf_invalid_context,
+
+		.event_init	= perf_ibs_init,
+		.add		= perf_ibs_add,
+		.del		= perf_ibs_del,
+		.start		= perf_ibs_start,
+		.stop		= perf_ibs_stop,
+		.read		= perf_ibs_read,
+	},
+	.msr			= MSR_AMD64_IBSOPCTL,
+	.config_mask		= IBS_OP_CONFIG_MASK,
+	.cnt_mask		= IBS_OP_MAX_CNT,
+	.enable_mask		= IBS_OP_ENABLE,
+	.valid_mask		= IBS_OP_VAL,
+	.max_period		= IBS_OP_MAX_CNT << 4,
+	.offset_mask		= { MSR_AMD64_IBSOP_REG_MASK },
+	.offset_max		= MSR_AMD64_IBSOP_REG_COUNT,
+
+	.get_count		= get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+	struct perf_event *event = pcpu->event;
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	struct perf_ibs_data ibs_data;
+	int offset, size, check_rip, offset_max, throttle = 0;
+	unsigned int msr;
+	u64 *buf, *config, period;
+
+	if (!test_bit(IBS_STARTED, pcpu->state)) {
+		/*
+		 * Catch spurious interrupts after stopping IBS: After
+		 * disabling IBS there could be still incomming NMIs
+		 * with samples that even have the valid bit cleared.
+		 * Mark all this NMIs as handled.
+		 */
+		return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+	}
+
+	msr = hwc->config_base;
+	buf = ibs_data.regs;
+	rdmsrl(msr, *buf);
+	if (!(*buf++ & perf_ibs->valid_mask))
+		return 0;
+
+	config = &ibs_data.regs[0];
+	perf_ibs_event_update(perf_ibs, event, config);
+	perf_sample_data_init(&data, 0, hwc->last_period);
+	if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+		goto out;	/* no sw counter overflow */
+
+	ibs_data.caps = ibs_caps;
+	size = 1;
+	offset = 1;
+	check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+	if (event->attr.sample_type & PERF_SAMPLE_RAW)
+		offset_max = perf_ibs->offset_max;
+	else if (check_rip)
+		offset_max = 2;
+	else
+		offset_max = 1;
+	do {
+		rdmsrl(msr + offset, *buf++);
+		size++;
+		offset = find_next_bit(perf_ibs->offset_mask,
+				       perf_ibs->offset_max,
+				       offset + 1);
+	} while (offset < offset_max);
+	ibs_data.size = sizeof(u64) * size;
+
+	regs = *iregs;
+	if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+		regs.flags &= ~PERF_EFLAGS_EXACT;
+	} else {
+		instruction_pointer_set(&regs, ibs_data.regs[1]);
+		regs.flags |= PERF_EFLAGS_EXACT;
+	}
+
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.size = sizeof(u32) + ibs_data.size;
+		raw.data = ibs_data.data;
+		data.raw = &raw;
+	}
+
+	throttle = perf_event_overflow(event, &data, &regs);
+out:
+	if (throttle)
+		perf_ibs_disable_event(perf_ibs, hwc, *config);
+	else
+		perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+	perf_event_update_userpage(event);
+
+	return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+	int handled = 0;
+
+	handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+	handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+	struct cpu_perf_ibs __percpu *pcpu;
+	int ret;
+
+	pcpu = alloc_percpu(struct cpu_perf_ibs);
+	if (!pcpu)
+		return -ENOMEM;
+
+	perf_ibs->pcpu = pcpu;
+
+	ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+	if (ret) {
+		perf_ibs->pcpu = NULL;
+		free_percpu(pcpu);
+	}
+
+	return ret;
+}
+
 static __init int perf_event_ibs_init(void)
 {
 	if (!ibs_caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
-	perf_pmu_register(&perf_ibs, "ibs", -1);
+	perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+	if (ibs_caps & IBS_CAPS_OPCNT)
+		perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+	perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+	register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
 	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2f..166546e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@
 	u64 status;
 	int handled;
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	/*
@@ -1082,7 +1080,7 @@
 		if (!intel_pmu_save_and_restart(event))
 			continue;
 
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, event->hw.last_period);
 
 		if (has_branch_stack(event))
 			data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df1..5a3edc2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@
 
 	ds->bts_index = ds->bts_buffer_base;
 
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
+	perf_sample_data_init(&data, 0, event->hw.last_period);
 	regs.ip     = 0;
 
 	/*
@@ -564,8 +563,7 @@
 	if (!intel_pmu_save_and_restart(event))
 		return;
 
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
+	perf_sample_data_init(&data, 0, event->hw.last_period);
 
 	/*
 	 * We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacf..47124a7 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@
 	int idx, handled = 0;
 	u64 val;
 
-	perf_sample_data_init(&data, 0);
-
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@
 		handled += overflow;
 
 		/* event overflow for sure */
-		data.period = event->hw.last_period;
+		perf_sample_data_init(&data, 0, hwc->last_period);
 
 		if (!x86_perf_event_set_period(event))
 			continue;
+
+
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
 	}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f..4243e8b 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
 #include <trace/syscall.h>
 
 #include <asm/cacheflush.h>
+#include <asm/kprobes.h>
 #include <asm/ftrace.h>
 #include <asm/nops.h>
-#include <asm/nmi.h>
-
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-/*
- * modifying_code is set to notify NMIs that they need to use
- * memory barriers when entering or exiting. But we don't want
- * to burden NMIs with unnecessary memory barriers when code
- * modification is not being done (which is most of the time).
- *
- * A mutex is already held when ftrace_arch_code_modify_prepare
- * and post_process are called. No locks need to be taken here.
- *
- * Stop machine will make sure currently running NMIs are done
- * and new NMIs will see the updated variable before we need
- * to worry about NMIs doing memory barriers.
- */
-static int modifying_code __read_mostly;
-static DEFINE_PER_CPU(int, save_modifying_code);
-
 int ftrace_arch_code_modify_prepare(void)
 {
 	set_kernel_text_rw();
 	set_all_modules_text_rw();
-	modifying_code = 1;
 	return 0;
 }
 
 int ftrace_arch_code_modify_post_process(void)
 {
-	modifying_code = 0;
 	set_all_modules_text_ro();
 	set_kernel_text_ro();
 	return 0;
@@ -90,134 +71,6 @@
 	return calc.code;
 }
 
-/*
- * Modifying code must take extra care. On an SMP machine, if
- * the code being modified is also being executed on another CPU
- * that CPU will have undefined results and possibly take a GPF.
- * We use kstop_machine to stop other CPUS from exectuing code.
- * But this does not stop NMIs from happening. We still need
- * to protect against that. We separate out the modification of
- * the code to take care of this.
- *
- * Two buffers are added: An IP buffer and a "code" buffer.
- *
- * 1) Put the instruction pointer into the IP buffer
- *    and the new code into the "code" buffer.
- * 2) Wait for any running NMIs to finish and set a flag that says
- *    we are modifying code, it is done in an atomic operation.
- * 3) Write the code
- * 4) clear the flag.
- * 5) Wait for any running NMIs to finish.
- *
- * If an NMI is executed, the first thing it does is to call
- * "ftrace_nmi_enter". This will check if the flag is set to write
- * and if it is, it will write what is in the IP and "code" buffers.
- *
- * The trick is, it does not matter if everyone is writing the same
- * content to the code location. Also, if a CPU is executing code
- * it is OK to write to that code location if the contents being written
- * are the same as what exists.
- */
-
-#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
-static atomic_t nmi_running = ATOMIC_INIT(0);
-static int mod_code_status;		/* holds return value of text write */
-static void *mod_code_ip;		/* holds the IP to write to */
-static const void *mod_code_newcode;	/* holds the text to write to the IP */
-
-static unsigned nmi_wait_count;
-static atomic_t nmi_update_count = ATOMIC_INIT(0);
-
-int ftrace_arch_read_dyn_info(char *buf, int size)
-{
-	int r;
-
-	r = snprintf(buf, size, "%u %u",
-		     nmi_wait_count,
-		     atomic_read(&nmi_update_count));
-	return r;
-}
-
-static void clear_mod_flag(void)
-{
-	int old = atomic_read(&nmi_running);
-
-	for (;;) {
-		int new = old & ~MOD_CODE_WRITE_FLAG;
-
-		if (old == new)
-			break;
-
-		old = atomic_cmpxchg(&nmi_running, old, new);
-	}
-}
-
-static void ftrace_mod_code(void)
-{
-	/*
-	 * Yes, more than one CPU process can be writing to mod_code_status.
-	 *    (and the code itself)
-	 * But if one were to fail, then they all should, and if one were
-	 * to succeed, then they all should.
-	 */
-	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
-					     MCOUNT_INSN_SIZE);
-
-	/* if we fail, then kill any new writers */
-	if (mod_code_status)
-		clear_mod_flag();
-}
-
-void ftrace_nmi_enter(void)
-{
-	__this_cpu_write(save_modifying_code, modifying_code);
-
-	if (!__this_cpu_read(save_modifying_code))
-		return;
-
-	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
-		smp_rmb();
-		ftrace_mod_code();
-		atomic_inc(&nmi_update_count);
-	}
-	/* Must have previous changes seen before executions */
-	smp_mb();
-}
-
-void ftrace_nmi_exit(void)
-{
-	if (!__this_cpu_read(save_modifying_code))
-		return;
-
-	/* Finish all executions before clearing nmi_running */
-	smp_mb();
-	atomic_dec(&nmi_running);
-}
-
-static void wait_for_nmi_and_set_mod_flag(void)
-{
-	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
-		return;
-
-	do {
-		cpu_relax();
-	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
-
-	nmi_wait_count++;
-}
-
-static void wait_for_nmi(void)
-{
-	if (!atomic_read(&nmi_running))
-		return;
-
-	do {
-		cpu_relax();
-	} while (atomic_read(&nmi_running));
-
-	nmi_wait_count++;
-}
-
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
@@ -238,26 +91,7 @@
 	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
 		ip = (unsigned long)__va(__pa(ip));
 
-	mod_code_ip = (void *)ip;
-	mod_code_newcode = new_code;
-
-	/* The buffers need to be visible before we let NMIs write them */
-	smp_mb();
-
-	wait_for_nmi_and_set_mod_flag();
-
-	/* Make sure all running NMIs have finished before we write the code */
-	smp_mb();
-
-	ftrace_mod_code();
-
-	/* Make sure the write happens before clearing the bit */
-	smp_mb();
-
-	clear_mod_flag();
-	wait_for_nmi();
-
-	return mod_code_status;
+	return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
 }
 
 static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,347 @@
 	return ret;
 }
 
+int modifying_ftrace_code __read_mostly;
+
+/*
+ * A breakpoint was added to the code address we are about to
+ * modify, and this is the handle that will just skip over it.
+ * We are either changing a nop into a trace call, or a trace
+ * call to a nop. While the change is taking place, we treat
+ * it just like it was a nop.
+ */
+int ftrace_int3_handler(struct pt_regs *regs)
+{
+	if (WARN_ON_ONCE(!regs))
+		return 0;
+
+	if (!ftrace_location(regs->ip - 1))
+		return 0;
+
+	regs->ip += MCOUNT_INSN_SIZE - 1;
+
+	return 1;
+}
+
+static int ftrace_write(unsigned long ip, const char *val, int size)
+{
+	/*
+	 * On x86_64, kernel text mappings are mapped read-only with
+	 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+	 * of the kernel text mapping to modify the kernel text.
+	 *
+	 * For 32bit kernels, these mappings are same and we can use
+	 * kernel identity mapping to modify code.
+	 */
+	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+		ip = (unsigned long)__va(__pa(ip));
+
+	return probe_kernel_write((void *)ip, val, size);
+}
+
+static int add_break(unsigned long ip, const char *old)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+	unsigned char brk = BREAKPOINT_INSTRUCTION;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+	if (ftrace_write(ip, &brk, 1))
+		return -EPERM;
+
+	return 0;
+}
+
+static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned const char *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_call_replace(ip, addr);
+
+	return add_break(rec->ip, old);
+}
+
+
+static int add_brk_on_nop(struct dyn_ftrace *rec)
+{
+	unsigned const char *old;
+
+	old = ftrace_nop_replace();
+
+	return add_break(rec->ip, old);
+}
+
+static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+{
+	unsigned long ftrace_addr;
+	int ret;
+
+	ret = ftrace_test_record(rec, enable);
+
+	ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+	switch (ret) {
+	case FTRACE_UPDATE_IGNORE:
+		return 0;
+
+	case FTRACE_UPDATE_MAKE_CALL:
+		/* converting nop to call */
+		return add_brk_on_nop(rec);
+
+	case FTRACE_UPDATE_MAKE_NOP:
+		/* converting a call to a nop */
+		return add_brk_on_call(rec, ftrace_addr);
+	}
+	return 0;
+}
+
+/*
+ * On error, we need to remove breakpoints. This needs to
+ * be done caefully. If the address does not currently have a
+ * breakpoint, we know we are done. Otherwise, we look at the
+ * remaining 4 bytes of the instruction. If it matches a nop
+ * we replace the breakpoint with the nop. Otherwise we replace
+ * it with the call instruction.
+ */
+static int remove_breakpoint(struct dyn_ftrace *rec)
+{
+	unsigned char ins[MCOUNT_INSN_SIZE];
+	unsigned char brk = BREAKPOINT_INSTRUCTION;
+	const unsigned char *nop;
+	unsigned long ftrace_addr;
+	unsigned long ip = rec->ip;
+
+	/* If we fail the read, just give up */
+	if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* If this does not have a breakpoint, we are done */
+	if (ins[0] != brk)
+		return -1;
+
+	nop = ftrace_nop_replace();
+
+	/*
+	 * If the last 4 bytes of the instruction do not match
+	 * a nop, then we assume that this is a call to ftrace_addr.
+	 */
+	if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
+		/*
+		 * For extra paranoidism, we check if the breakpoint is on
+		 * a call that would actually jump to the ftrace_addr.
+		 * If not, don't touch the breakpoint, we make just create
+		 * a disaster.
+		 */
+		ftrace_addr = (unsigned long)FTRACE_ADDR;
+		nop = ftrace_call_replace(ip, ftrace_addr);
+
+		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
+			return -EINVAL;
+	}
+
+	return probe_kernel_write((void *)ip, &nop[0], 1);
+}
+
+static int add_update_code(unsigned long ip, unsigned const char *new)
+{
+	/* skip breakpoint */
+	ip++;
+	new++;
+	if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
+		return -EPERM;
+	return 0;
+}
+
+static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned const char *new;
+
+	new = ftrace_call_replace(ip, addr);
+	return add_update_code(ip, new);
+}
+
+static int add_update_nop(struct dyn_ftrace *rec)
+{
+	unsigned long ip = rec->ip;
+	unsigned const char *new;
+
+	new = ftrace_nop_replace();
+	return add_update_code(ip, new);
+}
+
+static int add_update(struct dyn_ftrace *rec, int enable)
+{
+	unsigned long ftrace_addr;
+	int ret;
+
+	ret = ftrace_test_record(rec, enable);
+
+	ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+	switch (ret) {
+	case FTRACE_UPDATE_IGNORE:
+		return 0;
+
+	case FTRACE_UPDATE_MAKE_CALL:
+		/* converting nop to call */
+		return add_update_call(rec, ftrace_addr);
+
+	case FTRACE_UPDATE_MAKE_NOP:
+		/* converting a call to a nop */
+		return add_update_nop(rec);
+	}
+
+	return 0;
+}
+
+static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned const char *new;
+
+	new = ftrace_call_replace(ip, addr);
+
+	if (ftrace_write(ip, new, 1))
+		return -EPERM;
+
+	return 0;
+}
+
+static int finish_update_nop(struct dyn_ftrace *rec)
+{
+	unsigned long ip = rec->ip;
+	unsigned const char *new;
+
+	new = ftrace_nop_replace();
+
+	if (ftrace_write(ip, new, 1))
+		return -EPERM;
+	return 0;
+}
+
+static int finish_update(struct dyn_ftrace *rec, int enable)
+{
+	unsigned long ftrace_addr;
+	int ret;
+
+	ret = ftrace_update_record(rec, enable);
+
+	ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+	switch (ret) {
+	case FTRACE_UPDATE_IGNORE:
+		return 0;
+
+	case FTRACE_UPDATE_MAKE_CALL:
+		/* converting nop to call */
+		return finish_update_call(rec, ftrace_addr);
+
+	case FTRACE_UPDATE_MAKE_NOP:
+		/* converting a call to a nop */
+		return finish_update_nop(rec);
+	}
+
+	return 0;
+}
+
+static void do_sync_core(void *data)
+{
+	sync_core();
+}
+
+static void run_sync(void)
+{
+	int enable_irqs = irqs_disabled();
+
+	/* We may be called with interrupts disbled (on bootup). */
+	if (enable_irqs)
+		local_irq_enable();
+	on_each_cpu(do_sync_core, NULL, 1);
+	if (enable_irqs)
+		local_irq_disable();
+}
+
+static void ftrace_replace_code(int enable)
+{
+	struct ftrace_rec_iter *iter;
+	struct dyn_ftrace *rec;
+	const char *report = "adding breakpoints";
+	int count = 0;
+	int ret;
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+
+		ret = add_breakpoints(rec, enable);
+		if (ret)
+			goto remove_breakpoints;
+		count++;
+	}
+
+	run_sync();
+
+	report = "updating code";
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+
+		ret = add_update(rec, enable);
+		if (ret)
+			goto remove_breakpoints;
+	}
+
+	run_sync();
+
+	report = "removing breakpoints";
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+
+		ret = finish_update(rec, enable);
+		if (ret)
+			goto remove_breakpoints;
+	}
+
+	run_sync();
+
+	return;
+
+ remove_breakpoints:
+	ftrace_bug(ret, rec ? rec->ip : 0);
+	printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+		remove_breakpoint(rec);
+	}
+}
+
+void arch_ftrace_update_code(int command)
+{
+	modifying_ftrace_code++;
+
+	if (command & FTRACE_UPDATE_CALLS)
+		ftrace_replace_code(1);
+	else if (command & FTRACE_DISABLE_CALLS)
+		ftrace_replace_code(0);
+
+	if (command & FTRACE_UPDATE_TRACE_FUNC)
+		ftrace_update_ftrace_func(ftrace_trace_function);
+
+	if (command & FTRACE_START_FUNC_RET)
+		ftrace_enable_ftrace_graph_caller();
+	else if (command & FTRACE_STOP_FUNC_RET)
+		ftrace_disable_ftrace_graph_caller();
+
+	modifying_ftrace_code--;
+}
+
 int __init ftrace_dyn_arch_init(void *data)
 {
 	/* The return code is retured via data */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf3..eb1539e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -84,7 +84,7 @@
 
 #define nmi_to_desc(type) (&nmi_desc[type])
 
-static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
 {
 	struct nmi_desc *desc = nmi_to_desc(type);
 	struct nmiaction *a;
@@ -209,7 +209,7 @@
 
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
 
-static notrace __kprobes void
+static __kprobes void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
 	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
@@ -236,7 +236,7 @@
 	outb(reason, NMI_REASON_PORT);
 }
 
-static notrace __kprobes void
+static __kprobes void
 io_check_error(unsigned char reason, struct pt_regs *regs)
 {
 	unsigned long i;
@@ -263,7 +263,7 @@
 	outb(reason, NMI_REASON_PORT);
 }
 
-static notrace __kprobes void
+static __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
 	int handled;
@@ -305,7 +305,7 @@
 static DEFINE_PER_CPU(bool, swallow_nmi);
 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
 
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+static __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 	int handled;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f1..92d5756 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
 #include <asm/processor.h>
 #include <asm/debugreg.h>
 #include <linux/atomic.h>
+#include <asm/ftrace.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -303,8 +304,13 @@
 }
 
 /* May run on IST stack. */
-dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
+dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
 {
+#ifdef CONFIG_DYNAMIC_FTRACE
+	/* ftrace must be first, everything else may cause a recursive crash */
+	if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
+		return;
+#endif
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 				SIGTRAP) == NOTIFY_STOP)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 72a6cab..d32cc5e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -286,6 +286,12 @@
 struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
 struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
 
+#define for_ftrace_rec_iter(iter)		\
+	for (iter = ftrace_rec_iter_start();	\
+	     iter;				\
+	     iter = ftrace_rec_iter_next(iter))
+
+
 int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
@@ -485,8 +491,12 @@
   extern void trace_preempt_on(unsigned long a0, unsigned long a1);
   extern void trace_preempt_off(unsigned long a0, unsigned long a1);
 #else
-  static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { }
-  static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { }
+/*
+ * Use defines instead of static inlines because some arches will make code out
+ * of the CALLER_ADDR, when we really want these to be a real nop.
+ */
+# define trace_preempt_on(a0, a1) do { } while (0)
+# define trace_preempt_off(a0, a1) do { } while (0)
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 645231c..c0d3442 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -480,15 +480,16 @@
 
 #define trace_printk(fmt, args...)					\
 do {									\
-	__trace_printk_check_format(fmt, ##args);			\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
+	static const char *trace_printk_fmt				\
+		__attribute__((section("__trace_printk_fmt"))) =	\
+		__builtin_constant_p(fmt) ? fmt : NULL;			\
 									\
+	__trace_printk_check_format(fmt, ##args);			\
+									\
+	if (__builtin_constant_p(fmt))					\
 		__trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args);	\
-	} else								\
-		__trace_printk(_THIS_IP_, fmt, ##args);		\
+	else								\
+		__trace_printk(_THIS_IP_, fmt, ##args);			\
 } while (0)
 
 extern __printf(2, 3)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ddbb6a9..8adf70e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1084,10 +1084,8 @@
 
 extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
-extern void __perf_event_task_sched_in(struct task_struct *prev,
-				       struct task_struct *task);
-extern void __perf_event_task_sched_out(struct task_struct *prev,
-					struct task_struct *next);
+extern void __perf_event_task_sched(struct task_struct *prev,
+				    struct task_struct *next);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -1132,11 +1130,14 @@
 	struct perf_branch_stack	*br_stack;
 };
 
-static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+static inline void perf_sample_data_init(struct perf_sample_data *data,
+					 u64 addr, u64 period)
 {
+	/* remaining struct members initialized in perf_prepare_sample() */
 	data->addr = addr;
 	data->raw  = NULL;
 	data->br_stack = NULL;
+	data->period	= period;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1204,20 +1205,13 @@
 
 extern struct static_key_deferred perf_sched_events;
 
-static inline void perf_event_task_sched_in(struct task_struct *prev,
+static inline void perf_event_task_sched(struct task_struct *prev,
 					    struct task_struct *task)
 {
-	if (static_key_false(&perf_sched_events.key))
-		__perf_event_task_sched_in(prev, task);
-}
-
-static inline void perf_event_task_sched_out(struct task_struct *prev,
-					     struct task_struct *next)
-{
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
 
 	if (static_key_false(&perf_sched_events.key))
-		__perf_event_task_sched_out(prev, next);
+		__perf_event_task_sched(prev, task);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1292,11 +1286,8 @@
 extern void perf_event_task_tick(void);
 #else
 static inline void
-perf_event_task_sched_in(struct task_struct *prev,
-			 struct task_struct *task)			{ }
-static inline void
-perf_event_task_sched_out(struct task_struct *prev,
-			  struct task_struct *next)			{ }
+perf_event_task_sched(struct task_struct *prev,
+		      struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 7be2e88..6c8835f 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -96,9 +96,11 @@
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
+#define RING_BUFFER_ALL_CPUS -1
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
-int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu);
 
 void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val);
 
@@ -129,7 +131,7 @@
 void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
 int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
 
-unsigned long ring_buffer_size(struct ring_buffer *buffer);
+unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu);
 
 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_reset(struct ring_buffer *buffer);
diff --git a/init/Kconfig b/init/Kconfig
index 6cfd71d..7e37135 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1156,7 +1156,7 @@
 
 config PERF_EVENTS
 	bool "Kernel performance events and counters"
-	default y if (PROFILING || PERF_COUNTERS)
+	default y if PROFILING
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	select IRQ_WORK
@@ -1183,18 +1183,6 @@
 
 	  Say Y if unsure.
 
-config PERF_COUNTERS
-	bool "Kernel performance counters (old config option)"
-	depends on HAVE_PERF_EVENTS
-	help
-	  This config has been obsoleted by the PERF_EVENTS
-	  config option - please see that one for details.
-
-	  It has no effect on the kernel whether you enable
-	  it or not, it is a compatibility placeholder.
-
-	  Say N if unsure.
-
 config DEBUG_PERF_USE_VMALLOC
 	default n
 	bool "Debug: use vmalloc to back perf mmap() buffers"
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fd126f8..91a4459 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2039,8 +2039,8 @@
  * accessing the event control register. If a NMI hits, then it will
  * not restart the event.
  */
-void __perf_event_task_sched_out(struct task_struct *task,
-				 struct task_struct *next)
+static void __perf_event_task_sched_out(struct task_struct *task,
+					struct task_struct *next)
 {
 	int ctxn;
 
@@ -2279,8 +2279,8 @@
  * accessing the event control register. If a NMI hits, then it will
  * keep the event running.
  */
-void __perf_event_task_sched_in(struct task_struct *prev,
-				struct task_struct *task)
+static void __perf_event_task_sched_in(struct task_struct *prev,
+				       struct task_struct *task)
 {
 	struct perf_event_context *ctx;
 	int ctxn;
@@ -2305,6 +2305,12 @@
 		perf_branch_stack_sched_in(prev, task);
 }
 
+void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next)
+{
+	__perf_event_task_sched_out(prev, next);
+	__perf_event_task_sched_in(prev, next);
+}
+
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	u64 frequency = event->attr.sample_freq;
@@ -4957,7 +4963,7 @@
 	if (rctx < 0)
 		return;
 
-	perf_sample_data_init(&data, addr);
+	perf_sample_data_init(&data, addr, 0);
 
 	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
 
@@ -5215,7 +5221,7 @@
 		.data = record,
 	};
 
-	perf_sample_data_init(&data, addr);
+	perf_sample_data_init(&data, addr, 0);
 	data.raw = &raw;
 
 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
@@ -5318,7 +5324,7 @@
 	struct perf_sample_data sample;
 	struct pt_regs *regs = data;
 
-	perf_sample_data_init(&sample, bp->attr.bp_addr);
+	perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
 
 	if (!bp->hw.state && !perf_exclude_event(bp, regs))
 		perf_swevent_event(bp, 1, &sample, regs);
@@ -5344,13 +5350,12 @@
 
 	event->pmu->read(event);
 
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
+	perf_sample_data_init(&data, 0, event->hw.last_period);
 	regs = get_irq_regs();
 
 	if (regs && !perf_exclude_event(event, regs)) {
 		if (!(event->attr.exclude_idle && is_idle_task(current)))
-			if (perf_event_overflow(event, &data, regs))
+			if (__perf_event_overflow(event, 1, &data, regs))
 				ret = HRTIMER_NORESTART;
 	}
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0533a68..13c3883 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1913,7 +1913,7 @@
 		    struct task_struct *next)
 {
 	sched_info_switch(prev, next);
-	perf_event_task_sched_out(prev, next);
+	perf_event_task_sched(prev, next);
 	fire_sched_out_preempt_notifiers(prev, next);
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
@@ -1956,13 +1956,6 @@
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-	local_irq_disable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
-	perf_event_task_sched_in(prev, current);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-	local_irq_enable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	finish_lock_switch(rq, prev);
 	finish_arch_post_lock_switch();
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0fa92f6..cf81f27 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2469,57 +2469,35 @@
 ftrace_avail_open(struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
-	int ret;
 
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
-	if (!iter)
-		return -ENOMEM;
-
-	iter->pg = ftrace_pages_start;
-	iter->ops = &global_ops;
-
-	ret = seq_open(file, &show_ftrace_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-
-		m->private = iter;
-	} else {
-		kfree(iter);
+	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+	if (iter) {
+		iter->pg = ftrace_pages_start;
+		iter->ops = &global_ops;
 	}
 
-	return ret;
+	return iter ? 0 : -ENOMEM;
 }
 
 static int
 ftrace_enabled_open(struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
-	int ret;
 
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
-	if (!iter)
-		return -ENOMEM;
-
-	iter->pg = ftrace_pages_start;
-	iter->flags = FTRACE_ITER_ENABLED;
-	iter->ops = &global_ops;
-
-	ret = seq_open(file, &show_ftrace_seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-
-		m->private = iter;
-	} else {
-		kfree(iter);
+	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+	if (iter) {
+		iter->pg = ftrace_pages_start;
+		iter->flags = FTRACE_ITER_ENABLED;
+		iter->ops = &global_ops;
 	}
 
-	return ret;
+	return iter ? 0 : -ENOMEM;
 }
 
 static void ftrace_filter_reset(struct ftrace_hash *hash)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e..2d5eb33 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -449,6 +449,7 @@
 	raw_spinlock_t			reader_lock;	/* serialize readers */
 	arch_spinlock_t			lock;
 	struct lock_class_key		lock_key;
+	unsigned int			nr_pages;
 	struct list_head		*pages;
 	struct buffer_page		*head_page;	/* read from head */
 	struct buffer_page		*tail_page;	/* write to tail */
@@ -466,10 +467,12 @@
 	unsigned long			read_bytes;
 	u64				write_stamp;
 	u64				read_stamp;
+	/* ring buffer pages to update, > 0 to add, < 0 to remove */
+	int				nr_pages_to_update;
+	struct list_head		new_pages; /* new pages to add */
 };
 
 struct ring_buffer {
-	unsigned			pages;
 	unsigned			flags;
 	int				cpus;
 	atomic_t			record_disabled;
@@ -963,14 +966,10 @@
 	return 0;
 }
 
-static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
-			     unsigned nr_pages)
+static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
 {
+	int i;
 	struct buffer_page *bpage, *tmp;
-	LIST_HEAD(pages);
-	unsigned i;
-
-	WARN_ON(!nr_pages);
 
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page;
@@ -981,15 +980,13 @@
 		 */
 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
 				    GFP_KERNEL | __GFP_NORETRY,
-				    cpu_to_node(cpu_buffer->cpu));
+				    cpu_to_node(cpu));
 		if (!bpage)
 			goto free_pages;
 
-		rb_check_bpage(cpu_buffer, bpage);
+		list_add(&bpage->list, pages);
 
-		list_add(&bpage->list, &pages);
-
-		page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+		page = alloc_pages_node(cpu_to_node(cpu),
 					GFP_KERNEL | __GFP_NORETRY, 0);
 		if (!page)
 			goto free_pages;
@@ -997,6 +994,27 @@
 		rb_init_page(bpage->page);
 	}
 
+	return 0;
+
+free_pages:
+	list_for_each_entry_safe(bpage, tmp, pages, list) {
+		list_del_init(&bpage->list);
+		free_buffer_page(bpage);
+	}
+
+	return -ENOMEM;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+			     unsigned nr_pages)
+{
+	LIST_HEAD(pages);
+
+	WARN_ON(!nr_pages);
+
+	if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
+		return -ENOMEM;
+
 	/*
 	 * The ring buffer page list is a circular list that does not
 	 * start and end with a list head. All page list items point to
@@ -1005,20 +1023,15 @@
 	cpu_buffer->pages = pages.next;
 	list_del(&pages);
 
+	cpu_buffer->nr_pages = nr_pages;
+
 	rb_check_pages(cpu_buffer);
 
 	return 0;
-
- free_pages:
-	list_for_each_entry_safe(bpage, tmp, &pages, list) {
-		list_del_init(&bpage->list);
-		free_buffer_page(bpage);
-	}
-	return -ENOMEM;
 }
 
 static struct ring_buffer_per_cpu *
-rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct buffer_page *bpage;
@@ -1052,7 +1065,7 @@
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 
-	ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+	ret = rb_allocate_pages(cpu_buffer, nr_pages);
 	if (ret < 0)
 		goto fail_free_reader;
 
@@ -1113,7 +1126,7 @@
 {
 	struct ring_buffer *buffer;
 	int bsize;
-	int cpu;
+	int cpu, nr_pages;
 
 	/* keep it in its own cache line */
 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1137,14 @@
 	if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
 		goto fail_free_buffer;
 
-	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 	buffer->clock = trace_clock_local;
 	buffer->reader_lock_key = key;
 
 	/* need at least two pages */
-	if (buffer->pages < 2)
-		buffer->pages = 2;
+	if (nr_pages < 2)
+		nr_pages = 2;
 
 	/*
 	 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1167,7 @@
 
 	for_each_buffer_cpu(buffer, cpu) {
 		buffer->buffers[cpu] =
-			rb_allocate_cpu_buffer(buffer, cpu);
+			rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
 		if (!buffer->buffers[cpu])
 			goto fail_free_buffers;
 	}
@@ -1276,6 +1289,18 @@
 	raw_spin_unlock_irq(&cpu_buffer->reader_lock);
 }
 
+static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	if (cpu_buffer->nr_pages_to_update > 0)
+		rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages,
+				cpu_buffer->nr_pages_to_update);
+	else
+		rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update);
+	cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
+	/* reset this value */
+	cpu_buffer->nr_pages_to_update = 0;
+}
+
 /**
  * ring_buffer_resize - resize the ring buffer
  * @buffer: the buffer to resize.
@@ -1285,14 +1310,12 @@
  *
  * Returns -1 on failure.
  */
-int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
+			int cpu_id)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
-	unsigned nr_pages, rm_pages, new_pages;
-	struct buffer_page *bpage, *tmp;
-	unsigned long buffer_size;
-	LIST_HEAD(pages);
-	int i, cpu;
+	unsigned nr_pages;
+	int cpu;
 
 	/*
 	 * Always succeed at resizing a non-existent buffer:
@@ -1302,15 +1325,11 @@
 
 	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	size *= BUF_PAGE_SIZE;
-	buffer_size = buffer->pages * BUF_PAGE_SIZE;
 
 	/* we need a minimum of two pages */
 	if (size < BUF_PAGE_SIZE * 2)
 		size = BUF_PAGE_SIZE * 2;
 
-	if (size == buffer_size)
-		return size;
-
 	atomic_inc(&buffer->record_disabled);
 
 	/* Make sure all writers are done with this buffer. */
@@ -1321,68 +1340,56 @@
 
 	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 
-	if (size < buffer_size) {
-
-		/* easy case, just free pages */
-		if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
-			goto out_fail;
-
-		rm_pages = buffer->pages - nr_pages;
-
+	if (cpu_id == RING_BUFFER_ALL_CPUS) {
+		/* calculate the pages to update */
 		for_each_buffer_cpu(buffer, cpu) {
 			cpu_buffer = buffer->buffers[cpu];
-			rb_remove_pages(cpu_buffer, rm_pages);
-		}
-		goto out;
-	}
 
-	/*
-	 * This is a bit more difficult. We only want to add pages
-	 * when we can allocate enough for all CPUs. We do this
-	 * by allocating all the pages and storing them on a local
-	 * link list. If we succeed in our allocation, then we
-	 * add these pages to the cpu_buffers. Otherwise we just free
-	 * them all and return -ENOMEM;
-	 */
-	if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
-		goto out_fail;
+			cpu_buffer->nr_pages_to_update = nr_pages -
+							cpu_buffer->nr_pages;
 
-	new_pages = nr_pages - buffer->pages;
-
-	for_each_buffer_cpu(buffer, cpu) {
-		for (i = 0; i < new_pages; i++) {
-			struct page *page;
 			/*
-			 * __GFP_NORETRY flag makes sure that the allocation
-			 * fails gracefully without invoking oom-killer and
-			 * the system is not destabilized.
+			 * nothing more to do for removing pages or no update
 			 */
-			bpage = kzalloc_node(ALIGN(sizeof(*bpage),
-						  cache_line_size()),
-					    GFP_KERNEL | __GFP_NORETRY,
-					    cpu_to_node(cpu));
-			if (!bpage)
-				goto free_pages;
-			list_add(&bpage->list, &pages);
-			page = alloc_pages_node(cpu_to_node(cpu),
-						GFP_KERNEL | __GFP_NORETRY, 0);
-			if (!page)
-				goto free_pages;
-			bpage->page = page_address(page);
-			rb_init_page(bpage->page);
+			if (cpu_buffer->nr_pages_to_update <= 0)
+				continue;
+
+			/*
+			 * to add pages, make sure all new pages can be
+			 * allocated without receiving ENOMEM
+			 */
+			INIT_LIST_HEAD(&cpu_buffer->new_pages);
+			if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
+						&cpu_buffer->new_pages, cpu))
+				/* not enough memory for new pages */
+				goto no_mem;
 		}
-	}
 
-	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
-		rb_insert_pages(cpu_buffer, &pages, new_pages);
-	}
+		/* wait for all the updates to complete */
+		for_each_buffer_cpu(buffer, cpu) {
+			cpu_buffer = buffer->buffers[cpu];
+			if (cpu_buffer->nr_pages_to_update) {
+				update_pages_handler(cpu_buffer);
+			}
+		}
+	} else {
+		cpu_buffer = buffer->buffers[cpu_id];
+		if (nr_pages == cpu_buffer->nr_pages)
+			goto out;
 
-	if (RB_WARN_ON(buffer, !list_empty(&pages)))
-		goto out_fail;
+		cpu_buffer->nr_pages_to_update = nr_pages -
+						cpu_buffer->nr_pages;
+
+		INIT_LIST_HEAD(&cpu_buffer->new_pages);
+		if (cpu_buffer->nr_pages_to_update > 0 &&
+			__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
+						&cpu_buffer->new_pages, cpu_id))
+			goto no_mem;
+
+		update_pages_handler(cpu_buffer);
+	}
 
  out:
-	buffer->pages = nr_pages;
 	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
 
@@ -1390,25 +1397,24 @@
 
 	return size;
 
- free_pages:
-	list_for_each_entry_safe(bpage, tmp, &pages, list) {
-		list_del_init(&bpage->list);
-		free_buffer_page(bpage);
+ no_mem:
+	for_each_buffer_cpu(buffer, cpu) {
+		struct buffer_page *bpage, *tmp;
+		cpu_buffer = buffer->buffers[cpu];
+		/* reset this number regardless */
+		cpu_buffer->nr_pages_to_update = 0;
+		if (list_empty(&cpu_buffer->new_pages))
+			continue;
+		list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
+					list) {
+			list_del_init(&bpage->list);
+			free_buffer_page(bpage);
+		}
 	}
 	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
 	atomic_dec(&buffer->record_disabled);
 	return -ENOMEM;
-
-	/*
-	 * Something went totally wrong, and we are too paranoid
-	 * to even clean up the mess.
-	 */
- out_fail:
-	put_online_cpus();
-	mutex_unlock(&buffer->mutex);
-	atomic_dec(&buffer->record_disabled);
-	return -1;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_resize);
 
@@ -1510,7 +1516,7 @@
 	 * assign the commit to the tail.
 	 */
  again:
-	max_count = cpu_buffer->buffer->pages * 100;
+	max_count = cpu_buffer->nr_pages * 100;
 
 	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
 		if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3588,9 +3594,18 @@
  * ring_buffer_size - return the size of the ring buffer (in bytes)
  * @buffer: The ring buffer.
  */
-unsigned long ring_buffer_size(struct ring_buffer *buffer)
+unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
 {
-	return BUF_PAGE_SIZE * buffer->pages;
+	/*
+	 * Earlier, this method returned
+	 *	BUF_PAGE_SIZE * buffer->nr_pages
+	 * Since the nr_pages field is now removed, we have converted this to
+	 * return the per cpu buffer value.
+	 */
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_size);
 
@@ -3765,8 +3780,11 @@
 	    !cpumask_test_cpu(cpu, buffer_b->cpumask))
 		goto out;
 
+	cpu_buffer_a = buffer_a->buffers[cpu];
+	cpu_buffer_b = buffer_b->buffers[cpu];
+
 	/* At least make sure the two buffers are somewhat the same */
-	if (buffer_a->pages != buffer_b->pages)
+	if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
 		goto out;
 
 	ret = -EAGAIN;
@@ -3780,9 +3798,6 @@
 	if (atomic_read(&buffer_b->record_disabled))
 		goto out;
 
-	cpu_buffer_a = buffer_a->buffers[cpu];
-	cpu_buffer_b = buffer_b->buffers[cpu];
-
 	if (atomic_read(&cpu_buffer_a->record_disabled))
 		goto out;
 
@@ -4071,6 +4086,8 @@
 	struct ring_buffer *buffer =
 		container_of(self, struct ring_buffer, cpu_notify);
 	long cpu = (long)hcpu;
+	int cpu_i, nr_pages_same;
+	unsigned int nr_pages;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
@@ -4078,8 +4095,23 @@
 		if (cpumask_test_cpu(cpu, buffer->cpumask))
 			return NOTIFY_OK;
 
+		nr_pages = 0;
+		nr_pages_same = 1;
+		/* check if all cpu sizes are same */
+		for_each_buffer_cpu(buffer, cpu_i) {
+			/* fill in the size from first enabled cpu */
+			if (nr_pages == 0)
+				nr_pages = buffer->buffers[cpu_i]->nr_pages;
+			if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
+				nr_pages_same = 0;
+				break;
+			}
+		}
+		/* allocate minimum pages, user can later expand it */
+		if (!nr_pages_same)
+			nr_pages = 2;
 		buffer->buffers[cpu] =
-			rb_allocate_cpu_buffer(buffer, cpu);
+			rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
 		if (!buffer->buffers[cpu]) {
 			WARN(1, "failed to allocate ring buffer on CPU %ld\n",
 			     cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a22255..509e861 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,18 +87,6 @@
 
 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
 
-static inline void ftrace_disable_cpu(void)
-{
-	preempt_disable();
-	__this_cpu_inc(ftrace_cpu_disabled);
-}
-
-static inline void ftrace_enable_cpu(void)
-{
-	__this_cpu_dec(ftrace_cpu_disabled);
-	preempt_enable();
-}
-
 cpumask_var_t __read_mostly	tracing_buffer_mask;
 
 /*
@@ -629,7 +617,6 @@
 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 {
 	int len;
-	void *ret;
 
 	if (s->len <= s->readpos)
 		return -EBUSY;
@@ -637,9 +624,7 @@
 	len = s->len - s->readpos;
 	if (cnt > len)
 		cnt = len;
-	ret = memcpy(buf, s->buffer + s->readpos, cnt);
-	if (!ret)
-		return -EFAULT;
+	memcpy(buf, s->buffer + s->readpos, cnt);
 
 	s->readpos += cnt;
 	return cnt;
@@ -751,8 +736,6 @@
 
 	arch_spin_lock(&ftrace_max_lock);
 
-	ftrace_disable_cpu();
-
 	ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
 
 	if (ret == -EBUSY) {
@@ -766,8 +749,6 @@
 			"Failed to swap buffers due to commit in progress\n");
 	}
 
-	ftrace_enable_cpu();
-
 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
 
 	__update_max_tr(tr, tsk, cpu);
@@ -841,7 +822,8 @@
 
 		/* If we expanded the buffers, make sure the max is expanded too */
 		if (ring_buffer_expanded && type->use_max_tr)
-			ring_buffer_resize(max_tr.buffer, trace_buf_size);
+			ring_buffer_resize(max_tr.buffer, trace_buf_size,
+						RING_BUFFER_ALL_CPUS);
 
 		/* the test is responsible for initializing and enabling */
 		pr_info("Testing tracer %s: ", type->name);
@@ -857,7 +839,8 @@
 
 		/* Shrink the max buffer again */
 		if (ring_buffer_expanded && type->use_max_tr)
-			ring_buffer_resize(max_tr.buffer, 1);
+			ring_buffer_resize(max_tr.buffer, 1,
+						RING_BUFFER_ALL_CPUS);
 
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -917,13 +900,6 @@
 	mutex_unlock(&trace_types_lock);
 }
 
-static void __tracing_reset(struct ring_buffer *buffer, int cpu)
-{
-	ftrace_disable_cpu();
-	ring_buffer_reset_cpu(buffer, cpu);
-	ftrace_enable_cpu();
-}
-
 void tracing_reset(struct trace_array *tr, int cpu)
 {
 	struct ring_buffer *buffer = tr->buffer;
@@ -932,7 +908,7 @@
 
 	/* Make sure all commits have finished */
 	synchronize_sched();
-	__tracing_reset(buffer, cpu);
+	ring_buffer_reset_cpu(buffer, cpu);
 
 	ring_buffer_record_enable(buffer);
 }
@@ -950,7 +926,7 @@
 	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
-		__tracing_reset(buffer, cpu);
+		ring_buffer_reset_cpu(buffer, cpu);
 
 	ring_buffer_record_enable(buffer);
 }
@@ -1498,25 +1474,119 @@
 
 #endif /* CONFIG_STACKTRACE */
 
+/* created for use with alloc_percpu */
+struct trace_buffer_struct {
+	char buffer[TRACE_BUF_SIZE];
+};
+
+static struct trace_buffer_struct *trace_percpu_buffer;
+static struct trace_buffer_struct *trace_percpu_sirq_buffer;
+static struct trace_buffer_struct *trace_percpu_irq_buffer;
+static struct trace_buffer_struct *trace_percpu_nmi_buffer;
+
+/*
+ * The buffer used is dependent on the context. There is a per cpu
+ * buffer for normal context, softirq contex, hard irq context and
+ * for NMI context. Thise allows for lockless recording.
+ *
+ * Note, if the buffers failed to be allocated, then this returns NULL
+ */
+static char *get_trace_buf(void)
+{
+	struct trace_buffer_struct *percpu_buffer;
+	struct trace_buffer_struct *buffer;
+
+	/*
+	 * If we have allocated per cpu buffers, then we do not
+	 * need to do any locking.
+	 */
+	if (in_nmi())
+		percpu_buffer = trace_percpu_nmi_buffer;
+	else if (in_irq())
+		percpu_buffer = trace_percpu_irq_buffer;
+	else if (in_softirq())
+		percpu_buffer = trace_percpu_sirq_buffer;
+	else
+		percpu_buffer = trace_percpu_buffer;
+
+	if (!percpu_buffer)
+		return NULL;
+
+	buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
+
+	return buffer->buffer;
+}
+
+static int alloc_percpu_trace_buffer(void)
+{
+	struct trace_buffer_struct *buffers;
+	struct trace_buffer_struct *sirq_buffers;
+	struct trace_buffer_struct *irq_buffers;
+	struct trace_buffer_struct *nmi_buffers;
+
+	buffers = alloc_percpu(struct trace_buffer_struct);
+	if (!buffers)
+		goto err_warn;
+
+	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
+	if (!sirq_buffers)
+		goto err_sirq;
+
+	irq_buffers = alloc_percpu(struct trace_buffer_struct);
+	if (!irq_buffers)
+		goto err_irq;
+
+	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
+	if (!nmi_buffers)
+		goto err_nmi;
+
+	trace_percpu_buffer = buffers;
+	trace_percpu_sirq_buffer = sirq_buffers;
+	trace_percpu_irq_buffer = irq_buffers;
+	trace_percpu_nmi_buffer = nmi_buffers;
+
+	return 0;
+
+ err_nmi:
+	free_percpu(irq_buffers);
+ err_irq:
+	free_percpu(sirq_buffers);
+ err_sirq:
+	free_percpu(buffers);
+ err_warn:
+	WARN(1, "Could not allocate percpu trace_printk buffer");
+	return -ENOMEM;
+}
+
+void trace_printk_init_buffers(void)
+{
+	static int buffers_allocated;
+
+	if (buffers_allocated)
+		return;
+
+	if (alloc_percpu_trace_buffer())
+		return;
+
+	pr_info("ftrace: Allocated trace_printk buffers\n");
+
+	buffers_allocated = 1;
+}
+
 /**
  * trace_vbprintk - write binary msg to tracing buffer
  *
  */
 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 {
-	static arch_spinlock_t trace_buf_lock =
-		(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-	static u32 trace_buf[TRACE_BUF_SIZE];
-
 	struct ftrace_event_call *call = &event_bprint;
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
 	struct trace_array *tr = &global_trace;
-	struct trace_array_cpu *data;
 	struct bprint_entry *entry;
 	unsigned long flags;
-	int disable;
-	int cpu, len = 0, size, pc;
+	char *tbuffer;
+	int len = 0, size, pc;
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 		return 0;
@@ -1526,43 +1596,36 @@
 
 	pc = preempt_count();
 	preempt_disable_notrace();
-	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
 
-	disable = atomic_inc_return(&data->disabled);
-	if (unlikely(disable != 1))
+	tbuffer = get_trace_buf();
+	if (!tbuffer) {
+		len = 0;
+		goto out;
+	}
+
+	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
+
+	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
 		goto out;
 
-	/* Lockdep uses trace_printk for lock tracing */
-	local_irq_save(flags);
-	arch_spin_lock(&trace_buf_lock);
-	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
-
-	if (len > TRACE_BUF_SIZE || len < 0)
-		goto out_unlock;
-
+	local_save_flags(flags);
 	size = sizeof(*entry) + sizeof(u32) * len;
 	buffer = tr->buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
 					  flags, pc);
 	if (!event)
-		goto out_unlock;
+		goto out;
 	entry = ring_buffer_event_data(event);
 	entry->ip			= ip;
 	entry->fmt			= fmt;
 
-	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
 	if (!filter_check_discard(call, entry, buffer, event)) {
 		ring_buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
 
-out_unlock:
-	arch_spin_unlock(&trace_buf_lock);
-	local_irq_restore(flags);
-
 out:
-	atomic_dec_return(&data->disabled);
 	preempt_enable_notrace();
 	unpause_graph_tracing();
 
@@ -1588,58 +1651,53 @@
 int trace_array_vprintk(struct trace_array *tr,
 			unsigned long ip, const char *fmt, va_list args)
 {
-	static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-	static char trace_buf[TRACE_BUF_SIZE];
-
 	struct ftrace_event_call *call = &event_print;
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
-	struct trace_array_cpu *data;
-	int cpu, len = 0, size, pc;
+	int len = 0, size, pc;
 	struct print_entry *entry;
-	unsigned long irq_flags;
-	int disable;
+	unsigned long flags;
+	char *tbuffer;
 
 	if (tracing_disabled || tracing_selftest_running)
 		return 0;
 
+	/* Don't pollute graph traces with trace_vprintk internals */
+	pause_graph_tracing();
+
 	pc = preempt_count();
 	preempt_disable_notrace();
-	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
 
-	disable = atomic_inc_return(&data->disabled);
-	if (unlikely(disable != 1))
+
+	tbuffer = get_trace_buf();
+	if (!tbuffer) {
+		len = 0;
+		goto out;
+	}
+
+	len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
+	if (len > TRACE_BUF_SIZE)
 		goto out;
 
-	pause_graph_tracing();
-	raw_local_irq_save(irq_flags);
-	arch_spin_lock(&trace_buf_lock);
-	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
-
+	local_save_flags(flags);
 	size = sizeof(*entry) + len + 1;
 	buffer = tr->buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
-					  irq_flags, pc);
+					  flags, pc);
 	if (!event)
-		goto out_unlock;
+		goto out;
 	entry = ring_buffer_event_data(event);
 	entry->ip = ip;
 
-	memcpy(&entry->buf, trace_buf, len);
+	memcpy(&entry->buf, tbuffer, len);
 	entry->buf[len] = '\0';
 	if (!filter_check_discard(call, entry, buffer, event)) {
 		ring_buffer_unlock_commit(buffer, event);
-		ftrace_trace_stack(buffer, irq_flags, 6, pc);
+		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
-
- out_unlock:
-	arch_spin_unlock(&trace_buf_lock);
-	raw_local_irq_restore(irq_flags);
-	unpause_graph_tracing();
  out:
-	atomic_dec_return(&data->disabled);
 	preempt_enable_notrace();
+	unpause_graph_tracing();
 
 	return len;
 }
@@ -1652,14 +1710,9 @@
 
 static void trace_iterator_increment(struct trace_iterator *iter)
 {
-	/* Don't allow ftrace to trace into the ring buffers */
-	ftrace_disable_cpu();
-
 	iter->idx++;
 	if (iter->buffer_iter[iter->cpu])
 		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
-
-	ftrace_enable_cpu();
 }
 
 static struct trace_entry *
@@ -1669,17 +1722,12 @@
 	struct ring_buffer_event *event;
 	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-	/* Don't allow ftrace to trace into the ring buffers */
-	ftrace_disable_cpu();
-
 	if (buf_iter)
 		event = ring_buffer_iter_peek(buf_iter, ts);
 	else
 		event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
 					 lost_events);
 
-	ftrace_enable_cpu();
-
 	if (event) {
 		iter->ent_size = ring_buffer_event_length(event);
 		return ring_buffer_event_data(event);
@@ -1769,11 +1817,8 @@
 
 static void trace_consume(struct trace_iterator *iter)
 {
-	/* Don't allow ftrace to trace into the ring buffers */
-	ftrace_disable_cpu();
 	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
 			    &iter->lost_events);
-	ftrace_enable_cpu();
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1862,16 +1907,12 @@
 		iter->cpu = 0;
 		iter->idx = -1;
 
-		ftrace_disable_cpu();
-
 		if (cpu_file == TRACE_PIPE_ALL_CPU) {
 			for_each_tracing_cpu(cpu)
 				tracing_iter_reset(iter, cpu);
 		} else
 			tracing_iter_reset(iter, cpu_file);
 
-		ftrace_enable_cpu();
-
 		iter->leftover = 0;
 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
 			;
@@ -2332,15 +2373,13 @@
 __tracing_open(struct inode *inode, struct file *file)
 {
 	long cpu_file = (long) inode->i_private;
-	void *fail_ret = ERR_PTR(-ENOMEM);
 	struct trace_iterator *iter;
-	struct seq_file *m;
-	int cpu, ret;
+	int cpu;
 
 	if (tracing_disabled)
 		return ERR_PTR(-ENODEV);
 
-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
 	if (!iter)
 		return ERR_PTR(-ENOMEM);
 
@@ -2397,32 +2436,15 @@
 		tracing_iter_reset(iter, cpu);
 	}
 
-	ret = seq_open(file, &tracer_seq_ops);
-	if (ret < 0) {
-		fail_ret = ERR_PTR(ret);
-		goto fail_buffer;
-	}
-
-	m = file->private_data;
-	m->private = iter;
-
 	mutex_unlock(&trace_types_lock);
 
 	return iter;
 
- fail_buffer:
-	for_each_tracing_cpu(cpu) {
-		if (iter->buffer_iter[cpu])
-			ring_buffer_read_finish(iter->buffer_iter[cpu]);
-	}
-	free_cpumask_var(iter->started);
-	tracing_start();
  fail:
 	mutex_unlock(&trace_types_lock);
 	kfree(iter->trace);
-	kfree(iter);
-
-	return fail_ret;
+	seq_release_private(inode, file);
+	return ERR_PTR(-ENOMEM);
 }
 
 int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2458,11 +2480,10 @@
 	tracing_start();
 	mutex_unlock(&trace_types_lock);
 
-	seq_release(inode, file);
 	mutex_destroy(&iter->mutex);
 	free_cpumask_var(iter->started);
 	kfree(iter->trace);
-	kfree(iter);
+	seq_release_private(inode, file);
 	return 0;
 }
 
@@ -2974,7 +2995,14 @@
 	return t->init(tr);
 }
 
-static int __tracing_resize_ring_buffer(unsigned long size)
+static void set_buffer_entries(struct trace_array *tr, unsigned long val)
+{
+	int cpu;
+	for_each_tracing_cpu(cpu)
+		tr->data[cpu]->entries = val;
+}
+
+static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 {
 	int ret;
 
@@ -2985,19 +3013,32 @@
 	 */
 	ring_buffer_expanded = 1;
 
-	ret = ring_buffer_resize(global_trace.buffer, size);
+	ret = ring_buffer_resize(global_trace.buffer, size, cpu);
 	if (ret < 0)
 		return ret;
 
 	if (!current_trace->use_max_tr)
 		goto out;
 
-	ret = ring_buffer_resize(max_tr.buffer, size);
+	ret = ring_buffer_resize(max_tr.buffer, size, cpu);
 	if (ret < 0) {
-		int r;
+		int r = 0;
 
-		r = ring_buffer_resize(global_trace.buffer,
-				       global_trace.entries);
+		if (cpu == RING_BUFFER_ALL_CPUS) {
+			int i;
+			for_each_tracing_cpu(i) {
+				r = ring_buffer_resize(global_trace.buffer,
+						global_trace.data[i]->entries,
+						i);
+				if (r < 0)
+					break;
+			}
+		} else {
+			r = ring_buffer_resize(global_trace.buffer,
+						global_trace.data[cpu]->entries,
+						cpu);
+		}
+
 		if (r < 0) {
 			/*
 			 * AARGH! We are left with different
@@ -3019,14 +3060,21 @@
 		return ret;
 	}
 
-	max_tr.entries = size;
+	if (cpu == RING_BUFFER_ALL_CPUS)
+		set_buffer_entries(&max_tr, size);
+	else
+		max_tr.data[cpu]->entries = size;
+
  out:
-	global_trace.entries = size;
+	if (cpu == RING_BUFFER_ALL_CPUS)
+		set_buffer_entries(&global_trace, size);
+	else
+		global_trace.data[cpu]->entries = size;
 
 	return ret;
 }
 
-static ssize_t tracing_resize_ring_buffer(unsigned long size)
+static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
 {
 	int cpu, ret = size;
 
@@ -3042,12 +3090,19 @@
 			atomic_inc(&max_tr.data[cpu]->disabled);
 	}
 
-	if (size != global_trace.entries)
-		ret = __tracing_resize_ring_buffer(size);
+	if (cpu_id != RING_BUFFER_ALL_CPUS) {
+		/* make sure, this cpu is enabled in the mask */
+		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
 
+	ret = __tracing_resize_ring_buffer(size, cpu_id);
 	if (ret < 0)
 		ret = -ENOMEM;
 
+out:
 	for_each_tracing_cpu(cpu) {
 		if (global_trace.data[cpu])
 			atomic_dec(&global_trace.data[cpu]->disabled);
@@ -3078,7 +3133,8 @@
 
 	mutex_lock(&trace_types_lock);
 	if (!ring_buffer_expanded)
-		ret = __tracing_resize_ring_buffer(trace_buf_size);
+		ret = __tracing_resize_ring_buffer(trace_buf_size,
+						RING_BUFFER_ALL_CPUS);
 	mutex_unlock(&trace_types_lock);
 
 	return ret;
@@ -3102,7 +3158,8 @@
 	mutex_lock(&trace_types_lock);
 
 	if (!ring_buffer_expanded) {
-		ret = __tracing_resize_ring_buffer(trace_buf_size);
+		ret = __tracing_resize_ring_buffer(trace_buf_size,
+						RING_BUFFER_ALL_CPUS);
 		if (ret < 0)
 			goto out;
 		ret = 0;
@@ -3128,8 +3185,8 @@
 		 * The max_tr ring buffer has some state (e.g. ring->clock) and
 		 * we want preserve it.
 		 */
-		ring_buffer_resize(max_tr.buffer, 1);
-		max_tr.entries = 1;
+		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
+		set_buffer_entries(&max_tr, 1);
 	}
 	destroy_trace_option_files(topts);
 
@@ -3137,10 +3194,17 @@
 
 	topts = create_trace_option_files(current_trace);
 	if (current_trace->use_max_tr) {
-		ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
-		if (ret < 0)
-			goto out;
-		max_tr.entries = global_trace.entries;
+		int cpu;
+		/* we need to make per cpu buffer sizes equivalent */
+		for_each_tracing_cpu(cpu) {
+			ret = ring_buffer_resize(max_tr.buffer,
+						global_trace.data[cpu]->entries,
+						cpu);
+			if (ret < 0)
+				goto out;
+			max_tr.data[cpu]->entries =
+					global_trace.data[cpu]->entries;
+		}
 	}
 
 	if (t->init) {
@@ -3642,30 +3706,82 @@
 	goto out;
 }
 
+struct ftrace_entries_info {
+	struct trace_array	*tr;
+	int			cpu;
+};
+
+static int tracing_entries_open(struct inode *inode, struct file *filp)
+{
+	struct ftrace_entries_info *info;
+
+	if (tracing_disabled)
+		return -ENODEV;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->tr = &global_trace;
+	info->cpu = (unsigned long)inode->i_private;
+
+	filp->private_data = info;
+
+	return 0;
+}
+
 static ssize_t
 tracing_entries_read(struct file *filp, char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
-	struct trace_array *tr = filp->private_data;
-	char buf[96];
-	int r;
+	struct ftrace_entries_info *info = filp->private_data;
+	struct trace_array *tr = info->tr;
+	char buf[64];
+	int r = 0;
+	ssize_t ret;
 
 	mutex_lock(&trace_types_lock);
-	if (!ring_buffer_expanded)
-		r = sprintf(buf, "%lu (expanded: %lu)\n",
-			    tr->entries >> 10,
-			    trace_buf_size >> 10);
-	else
-		r = sprintf(buf, "%lu\n", tr->entries >> 10);
+
+	if (info->cpu == RING_BUFFER_ALL_CPUS) {
+		int cpu, buf_size_same;
+		unsigned long size;
+
+		size = 0;
+		buf_size_same = 1;
+		/* check if all cpu sizes are same */
+		for_each_tracing_cpu(cpu) {
+			/* fill in the size from first enabled cpu */
+			if (size == 0)
+				size = tr->data[cpu]->entries;
+			if (size != tr->data[cpu]->entries) {
+				buf_size_same = 0;
+				break;
+			}
+		}
+
+		if (buf_size_same) {
+			if (!ring_buffer_expanded)
+				r = sprintf(buf, "%lu (expanded: %lu)\n",
+					    size >> 10,
+					    trace_buf_size >> 10);
+			else
+				r = sprintf(buf, "%lu\n", size >> 10);
+		} else
+			r = sprintf(buf, "X\n");
+	} else
+		r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
+
 	mutex_unlock(&trace_types_lock);
 
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+	return ret;
 }
 
 static ssize_t
 tracing_entries_write(struct file *filp, const char __user *ubuf,
 		      size_t cnt, loff_t *ppos)
 {
+	struct ftrace_entries_info *info = filp->private_data;
 	unsigned long val;
 	int ret;
 
@@ -3680,7 +3796,7 @@
 	/* value is in KB */
 	val <<= 10;
 
-	ret = tracing_resize_ring_buffer(val);
+	ret = tracing_resize_ring_buffer(val, info->cpu);
 	if (ret < 0)
 		return ret;
 
@@ -3689,6 +3805,16 @@
 	return cnt;
 }
 
+static int
+tracing_entries_release(struct inode *inode, struct file *filp)
+{
+	struct ftrace_entries_info *info = filp->private_data;
+
+	kfree(info);
+
+	return 0;
+}
+
 static ssize_t
 tracing_total_entries_read(struct file *filp, char __user *ubuf,
 				size_t cnt, loff_t *ppos)
@@ -3700,7 +3826,7 @@
 
 	mutex_lock(&trace_types_lock);
 	for_each_tracing_cpu(cpu) {
-		size += tr->entries >> 10;
+		size += tr->data[cpu]->entries >> 10;
 		if (!ring_buffer_expanded)
 			expanded_size += trace_buf_size >> 10;
 	}
@@ -3734,7 +3860,7 @@
 	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
 		tracing_off();
 	/* resize the ring buffer to 0 */
-	tracing_resize_ring_buffer(0);
+	tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS);
 
 	return 0;
 }
@@ -3933,9 +4059,10 @@
 };
 
 static const struct file_operations tracing_entries_fops = {
-	.open		= tracing_open_generic,
+	.open		= tracing_entries_open,
 	.read		= tracing_entries_read,
 	.write		= tracing_entries_write,
+	.release	= tracing_entries_release,
 	.llseek		= generic_file_llseek,
 };
 
@@ -4387,6 +4514,9 @@
 
 	trace_create_file("stats", 0444, d_cpu,
 			(void *) cpu, &tracing_stats_fops);
+
+	trace_create_file("buffer_size_kb", 0444, d_cpu,
+			(void *) cpu, &tracing_entries_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -4718,7 +4848,7 @@
 			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
 
 	trace_create_file("buffer_size_kb", 0644, d_tracer,
-			&global_trace, &tracing_entries_fops);
+			(void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops);
 
 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
 			&global_trace, &tracing_total_entries_fops);
@@ -4957,6 +5087,10 @@
 	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
 		goto out_free_buffer_mask;
 
+	/* Only allocate trace_printk buffers if a trace_printk exists */
+	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
+		trace_printk_init_buffers();
+
 	/* To save memory, keep the ring buffer size to its minimum */
 	if (ring_buffer_expanded)
 		ring_buf_size = trace_buf_size;
@@ -4975,7 +5109,6 @@
 		WARN_ON(1);
 		goto out_free_cpumask;
 	}
-	global_trace.entries = ring_buffer_size(global_trace.buffer);
 	if (global_trace.buffer_disabled)
 		tracing_off();
 
@@ -4988,7 +5121,6 @@
 		ring_buffer_free(global_trace.buffer);
 		goto out_free_cpumask;
 	}
-	max_tr.entries = 1;
 #endif
 
 	/* Allocate the first page for all buffers */
@@ -4997,6 +5129,11 @@
 		max_tr.data[i] = &per_cpu(max_tr_data, i);
 	}
 
+	set_buffer_entries(&global_trace, ring_buf_size);
+#ifdef CONFIG_TRACER_MAX_TRACE
+	set_buffer_entries(&max_tr, 1);
+#endif
+
 	trace_init_cmdlines();
 
 	register_tracer(&nop_trace);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f95d65d..6c6f793 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -131,6 +131,7 @@
 	atomic_t		disabled;
 	void			*buffer_page;	/* ring buffer spare */
 
+	unsigned long		entries;
 	unsigned long		saved_latency;
 	unsigned long		critical_start;
 	unsigned long		critical_end;
@@ -152,7 +153,6 @@
  */
 struct trace_array {
 	struct ring_buffer	*buffer;
-	unsigned long		entries;
 	int			cpu;
 	int			buffer_disabled;
 	cycle_t			time_start;
@@ -826,6 +826,8 @@
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
+void trace_printk_init_buffers(void);
+
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\
 	extern struct ftrace_event_call					\
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 6fd4ffd..a9077c1 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -51,6 +51,10 @@
 	const char **iter;
 	char *fmt;
 
+	/* allocate the trace_printk per cpu buffers */
+	if (start != end)
+		trace_printk_init_buffers();
+
 	mutex_lock(&btrace_mutex);
 	for (iter = start; iter < end; iter++) {
 		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
diff --git a/tools/Makefile b/tools/Makefile
new file mode 100644
index 0000000..3ae4394
--- /dev/null
+++ b/tools/Makefile
@@ -0,0 +1,77 @@
+include scripts/Makefile.include
+
+help:
+	@echo 'Possible targets:'
+	@echo ''
+	@echo '  cpupower   - a tool for all things x86 CPU power'
+	@echo '  firewire   - the userspace part of nosy, an IEEE-1394 traffic sniffer'
+	@echo '  lguest     - a minimal 32-bit x86 hypervisor'
+	@echo '  perf       - Linux performance measurement and analysis tool'
+	@echo '  selftests  - various kernel selftests'
+	@echo '  turbostat  - Intel CPU idle stats and freq reporting tool'
+	@echo '  usb        - USB testing tools'
+	@echo '  virtio     - vhost test module'
+	@echo '  vm         - misc vm tools'
+	@echo '  x86_energy_perf_policy - Intel energy policy tool'
+	@echo ''
+	@echo 'You can do:'
+	@echo ' $$ make -C tools/<tool>_install'
+	@echo ''
+	@echo '  from the kernel command line to build and install one of'
+	@echo '  the tools above'
+	@echo ''
+	@echo '  $$ make tools/install'
+	@echo ''
+	@echo '  installs all tools.'
+	@echo ''
+	@echo 'Cleaning targets:'
+	@echo ''
+	@echo '  all of the above with the "_clean" string appended cleans'
+	@echo '    the respective build directory.'
+	@echo '  clean: a summary clean target to clean _all_ folders'
+
+cpupower: FORCE
+	$(QUIET_SUBDIR0)power/$@/ $(QUIET_SUBDIR1)
+
+firewire lguest perf usb virtio vm: FORCE
+	$(QUIET_SUBDIR0)$@/ $(QUIET_SUBDIR1)
+
+selftests: FORCE
+	$(QUIET_SUBDIR0)testing/$@/ $(QUIET_SUBDIR1)
+
+turbostat x86_energy_perf_policy: FORCE
+	$(QUIET_SUBDIR0)power/x86/$@/ $(QUIET_SUBDIR1)
+
+cpupower_install:
+	$(QUIET_SUBDIR0)power/$(@:_install=)/ $(QUIET_SUBDIR1) install
+
+firewire_install lguest_install perf_install usb_install virtio_install vm_install:
+	$(QUIET_SUBDIR0)$(@:_install=)/ $(QUIET_SUBDIR1) install
+
+selftests_install:
+	$(QUIET_SUBDIR0)testing/$(@:_clean=)/ $(QUIET_SUBDIR1) install
+
+turbostat_install x86_energy_perf_policy_install:
+	$(QUIET_SUBDIR0)power/x86/$(@:_install=)/ $(QUIET_SUBDIR1) install
+
+install: cpupower_install firewire_install lguest_install perf_install \
+		selftests_install turbostat_install usb_install virtio_install \
+		vm_install x86_energy_perf_policy_install
+
+cpupower_clean:
+	$(QUIET_SUBDIR0)power/cpupower/ $(QUIET_SUBDIR1) clean
+
+firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean:
+	$(QUIET_SUBDIR0)$(@:_clean=)/ $(QUIET_SUBDIR1) clean
+
+selftests_clean:
+	$(QUIET_SUBDIR0)testing/$(@:_clean=)/ $(QUIET_SUBDIR1) clean
+
+turbostat_clean x86_energy_perf_policy_clean:
+	$(QUIET_SUBDIR0)power/x86/$(@:_clean=)/ $(QUIET_SUBDIR1) clean
+
+clean: cpupower_clean firewire_clean lguest_clean perf_clean selftests_clean \
+		turbostat_clean usb_clean virtio_clean vm_clean \
+		x86_energy_perf_policy_clean
+
+.PHONY: FORCE
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
index d144866..42c6fd2 100644
--- a/tools/perf/Documentation/perfconfig.example
+++ b/tools/perf/Documentation/perfconfig.example
@@ -6,6 +6,7 @@
 	normal = black, lightgray
 	selected = lightgray, magenta
 	code = blue, lightgray
+	addr = magenta, lightgray
 
 [tui]
 
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 92271d3..398094c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -1,18 +1,10 @@
-ifeq ("$(origin O)", "command line")
-	OUTPUT := $(O)/
-endif
+include ../scripts/Makefile.include
 
 # The default target of this Makefile is...
 all:
 
 include config/utilities.mak
 
-ifneq ($(OUTPUT),)
-# check that the output directory actually exists
-OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
-$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
-endif
-
 # Define V to have a more verbose compile.
 #
 # Define O to save output files in a separate directory.
@@ -84,31 +76,6 @@
 	CFLAGS_WERROR := -Werror
 endif
 
-#
-# Include saner warnings here, which can catch bugs:
-#
-
-EXTRA_WARNINGS := -Wformat
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat-security
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat-y2k
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wshadow
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Winit-self
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wpacked
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wredundant-decls
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-aliasing=3
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-default
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-enum
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wno-system-headers
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wundef
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wwrite-strings
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wbad-function-cast
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-declarations
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-prototypes
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wnested-externs
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement
-
 ifeq ("$(origin DEBUG)", "command line")
   PERF_DEBUG = $(DEBUG)
 endif
@@ -333,6 +300,7 @@
 LIB_H += util/top.h
 LIB_H += $(ARCH_INCLUDE)
 LIB_H += util/cgroup.h
+LIB_H += util/target.h
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -394,6 +362,7 @@
 LIB_OBJS += $(OUTPUT)util/xyarray.o
 LIB_OBJS += $(OUTPUT)util/cpumap.o
 LIB_OBJS += $(OUTPUT)util/cgroup.o
+LIB_OBJS += $(OUTPUT)util/target.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 
@@ -506,22 +475,23 @@
 		# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
 		BASIC_CFLAGS += -I/usr/include/slang
 		EXTLIBS += -lnewt -lslang
-		LIB_OBJS += $(OUTPUT)util/ui/setup.o
-		LIB_OBJS += $(OUTPUT)util/ui/browser.o
-		LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o
-		LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o
-		LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o
-		LIB_OBJS += $(OUTPUT)util/ui/helpline.o
-		LIB_OBJS += $(OUTPUT)util/ui/progress.o
-		LIB_OBJS += $(OUTPUT)util/ui/util.o
-		LIB_H += util/ui/browser.h
-		LIB_H += util/ui/browsers/map.h
-		LIB_H += util/ui/helpline.h
-		LIB_H += util/ui/keysyms.h
-		LIB_H += util/ui/libslang.h
-		LIB_H += util/ui/progress.h
-		LIB_H += util/ui/util.h
-		LIB_H += util/ui/ui.h
+		LIB_OBJS += $(OUTPUT)ui/setup.o
+		LIB_OBJS += $(OUTPUT)ui/browser.o
+		LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
+		LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
+		LIB_OBJS += $(OUTPUT)ui/browsers/map.o
+		LIB_OBJS += $(OUTPUT)ui/helpline.o
+		LIB_OBJS += $(OUTPUT)ui/progress.o
+		LIB_OBJS += $(OUTPUT)ui/util.o
+		LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+		LIB_H += ui/browser.h
+		LIB_H += ui/browsers/map.h
+		LIB_H += ui/helpline.h
+		LIB_H += ui/keysyms.h
+		LIB_H += ui/libslang.h
+		LIB_H += ui/progress.h
+		LIB_H += ui/util.h
+		LIB_H += ui/ui.h
 	endif
 endif
 
@@ -535,7 +505,12 @@
 	else
 		BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0)
 		EXTLIBS += $(shell pkg-config --libs gtk+-2.0)
-		LIB_OBJS += $(OUTPUT)util/gtk/browser.o
+		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
+		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
+		# Make sure that it'd be included only once.
+		ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),)
+			LIB_OBJS += $(OUTPUT)ui/setup.o
+		endif
 	endif
 endif
 
@@ -678,18 +653,6 @@
 	endif
 endif
 
-ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifndef V
-	QUIET_CC       = @echo '   ' CC $@;
-	QUIET_AR       = @echo '   ' AR $@;
-	QUIET_LINK     = @echo '   ' LINK $@;
-	QUIET_MKDIR    = @echo '   ' MKDIR $@;
-	QUIET_GEN      = @echo '   ' GEN $@;
-	QUIET_FLEX     = @echo '   ' FLEX $@;
-	QUIET_BISON    = @echo '   ' BISON $@;
-endif
-endif
-
 ifdef ASCIIDOC8
 	export ASCIIDOC8
 endif
@@ -800,16 +763,16 @@
 $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
-$(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS
+$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
 
-$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
+$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
 
-$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
+$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
 
-$(OUTPUT)util/ui/browsers/map.o: util/ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
+$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
 
 $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index be4e1ee..8a3dfac 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -44,7 +44,6 @@
 	struct perf_evlist	*evlist;
 	struct perf_session	*session;
 	const char		*progname;
-	const char		*uid_str;
 	int			output;
 	unsigned int		page_size;
 	int			realtime_prio;
@@ -218,7 +217,7 @@
 			if (err == EPERM || err == EACCES) {
 				ui__error_paranoid();
 				exit(EXIT_FAILURE);
-			} else if (err ==  ENODEV && opts->cpu_list) {
+			} else if (err ==  ENODEV && opts->target.cpu_list) {
 				die("No such device - did you specify"
 					" an out-of-range profile CPU?\n");
 			} else if (err == EINVAL) {
@@ -243,9 +242,13 @@
 			/*
 			 * If it's cycles then fall back to hrtimer
 			 * based cpu-clock-tick sw counter, which
-			 * is always available even if no PMU support:
+			 * is always available even if no PMU support.
+			 *
+			 * PPC returns ENXIO until 2.6.37 (behavior changed
+			 * with commit b0a873e).
 			 */
-			if (attr->type == PERF_TYPE_HARDWARE
+			if ((err == ENOENT || err == ENXIO)
+					&& attr->type == PERF_TYPE_HARDWARE
 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 
 				if (verbose)
@@ -253,6 +256,10 @@
 						    "trying to fall back to cpu-clock-ticks\n");
 				attr->type = PERF_TYPE_SOFTWARE;
 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
+				if (pos->name) {
+					free(pos->name);
+					pos->name = NULL;
+				}
 				goto try_again;
 			}
 
@@ -578,7 +585,7 @@
 		perf_session__process_machines(session, tool,
 					       perf_event__synthesize_guest_os);
 
-	if (!opts->system_wide)
+	if (!opts->target.system_wide)
 		perf_event__synthesize_thread_map(tool, evsel_list->threads,
 						  process_synthesized_event,
 						  machine);
@@ -747,6 +754,9 @@
 		.user_freq	     = UINT_MAX,
 		.user_interval	     = ULLONG_MAX,
 		.freq		     = 1000,
+		.target		     = {
+			.uses_mmap   = true,
+		},
 	},
 	.write_mode = WRITE_FORCE,
 	.file_new   = true,
@@ -765,9 +775,9 @@
 		     parse_events_option),
 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
 		     "event filter", parse_filter),
-	OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
+	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
 		    "record events on existing process id"),
-	OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
+	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
 		    "record events on existing thread id"),
 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
@@ -775,11 +785,11 @@
 		    "collect data without buffering"),
 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
 		    "collect raw sample records from all opened counters"),
-	OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
+	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
 			    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('A', "append", &record.append_file,
 			    "append to the output file to do incremental profiling"),
-	OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
+	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
 	OPT_BOOLEAN('f', "force", &record.force,
 			"overwrite existing data file (deprecated)"),
@@ -813,7 +823,8 @@
 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 		     "monitor event in cgroup name only",
 		     parse_cgroups),
-	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
+	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
+		   "user to profile"),
 
 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
 		     "branch any", "sample any taken branches",
@@ -831,6 +842,7 @@
 	struct perf_evsel *pos;
 	struct perf_evlist *evsel_list;
 	struct perf_record *rec = &record;
+	char errbuf[BUFSIZ];
 
 	perf_header__set_cmdline(argc, argv);
 
@@ -842,8 +854,7 @@
 
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
-	if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
-		!rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
+	if (!argc && perf_target__none(&rec->opts.target))
 		usage_with_options(record_usage, record_options);
 
 	if (rec->force && rec->append_file) {
@@ -856,7 +867,7 @@
 		rec->write_mode = WRITE_FORCE;
 	}
 
-	if (nr_cgroups && !rec->opts.system_wide) {
+	if (nr_cgroups && !rec->opts.target.system_wide) {
 		fprintf(stderr, "cgroup monitoring only available in"
 			" system-wide mode\n");
 		usage_with_options(record_usage, record_options);
@@ -883,17 +894,25 @@
 		goto out_symbol_exit;
 	}
 
-	rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
-					 rec->opts.target_pid);
-	if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
+	err = perf_target__validate(&rec->opts.target);
+	if (err) {
+		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
+	}
+
+	err = perf_target__parse_uid(&rec->opts.target);
+	if (err) {
+		int saved_errno = errno;
+
+		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
+
+		err = -saved_errno;
 		goto out_free_fd;
+	}
 
-	if (rec->opts.target_pid)
-		rec->opts.target_tid = rec->opts.target_pid;
-
-	if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
-				     rec->opts.target_tid, rec->opts.uid,
-				     rec->opts.cpu_list) < 0)
+	err = -ENOMEM;
+	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
 
 	list_for_each_entry(pos, &evsel_list->entries, node) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cdae9b2..d58e414 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -296,12 +296,15 @@
 {
 	size_t ret;
 	char unit;
-	unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
+	unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
+	u64 nr_events = self->stats.total_period;
 
-	nr_events = convert_unit(nr_events, &unit);
-	ret = fprintf(fp, "# Events: %lu%c", nr_events, unit);
+	nr_samples = convert_unit(nr_samples, &unit);
+	ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
 	if (evname != NULL)
-		ret += fprintf(fp, " %s", evname);
+		ret += fprintf(fp, " of event '%s'", evname);
+
+	ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
 	return ret + fprintf(fp, "\n#\n");
 }
 
@@ -680,14 +683,10 @@
 
 	}
 
-	if (strcmp(report.input_name, "-") != 0) {
-		if (report.use_gtk)
-			perf_gtk_setup_browser(argc, argv, true);
-		else
-			setup_browser(true);
-	} else {
+	if (strcmp(report.input_name, "-") != 0)
+		setup_browser(true);
+	else
 		use_browser = 0;
-	}
 
 	/*
 	 * Only in the newt browser we are doing integrated annotation,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1e5e9b2..62ae30d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -173,24 +173,23 @@
 
 
 
-struct perf_evlist		*evsel_list;
+static struct perf_evlist	*evsel_list;
 
-static bool			system_wide			=  false;
+static struct perf_target	target = {
+	.uid	= UINT_MAX,
+};
+
 static int			run_idx				=  0;
-
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
 static bool			no_aggr				= false;
-static const char		*target_pid;
-static const char		*target_tid;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			sync_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
-static const char		*cpu_list;
 static const char		*csv_sep			= NULL;
 static bool			csv_output			= false;
 static bool			group				= false;
@@ -265,18 +264,18 @@
 	return sqrt(variance_mean);
 }
 
-struct stats			runtime_nsecs_stats[MAX_NR_CPUS];
-struct stats			runtime_cycles_stats[MAX_NR_CPUS];
-struct stats			runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
-struct stats			runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
-struct stats			runtime_branches_stats[MAX_NR_CPUS];
-struct stats			runtime_cacherefs_stats[MAX_NR_CPUS];
-struct stats			runtime_l1_dcache_stats[MAX_NR_CPUS];
-struct stats			runtime_l1_icache_stats[MAX_NR_CPUS];
-struct stats			runtime_ll_cache_stats[MAX_NR_CPUS];
-struct stats			runtime_itlb_cache_stats[MAX_NR_CPUS];
-struct stats			runtime_dtlb_cache_stats[MAX_NR_CPUS];
-struct stats			walltime_nsecs_stats;
+static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
+static struct stats runtime_branches_stats[MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats walltime_nsecs_stats;
 
 static int create_perf_stat_counter(struct perf_evsel *evsel,
 				    struct perf_evsel *first)
@@ -299,15 +298,15 @@
 	if (exclude_guest_missing)
 		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
 
-	if (system_wide) {
+	if (perf_target__has_cpu(&target)) {
 		ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
-						group, group_fd);
+					       group, group_fd);
 		if (ret)
 			goto check_ret;
 		return 0;
 	}
 
-	if (!target_pid && !target_tid && (!group || evsel == first)) {
+	if (!perf_target__has_task(&target) && (!group || evsel == first)) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
@@ -471,7 +470,7 @@
 			exit(-1);
 		}
 
-		if (!target_tid && !target_pid && !system_wide)
+		if (perf_target__none(&target))
 			evsel_list->threads->map[0] = child_pid;
 
 		/*
@@ -506,7 +505,7 @@
 				error("You may not have permission to collect %sstats.\n"
 				      "\t Consider tweaking"
 				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
-				      system_wide ? "system-wide " : "");
+				      target.system_wide ? "system-wide " : "");
 			} else {
 				error("open_counter returned with %d (%s). "
 				      "/bin/dmesg may provide additional information.\n",
@@ -998,14 +997,14 @@
 	if (!csv_output) {
 		fprintf(output, "\n");
 		fprintf(output, " Performance counter stats for ");
-		if (!target_pid && !target_tid) {
+		if (!perf_target__has_task(&target)) {
 			fprintf(output, "\'%s", argv[0]);
 			for (i = 1; i < argc; i++)
 				fprintf(output, " %s", argv[i]);
-		} else if (target_pid)
-			fprintf(output, "process id \'%s", target_pid);
+		} else if (target.pid)
+			fprintf(output, "process id \'%s", target.pid);
 		else
-			fprintf(output, "thread id \'%s", target_tid);
+			fprintf(output, "thread id \'%s", target.tid);
 
 		fprintf(output, "\'");
 		if (run_count > 1)
@@ -1079,11 +1078,11 @@
 		     "event filter", parse_filter),
 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
 		    "child tasks do not inherit counters"),
-	OPT_STRING('p', "pid", &target_pid, "pid",
+	OPT_STRING('p', "pid", &target.pid, "pid",
 		   "stat events on existing process id"),
-	OPT_STRING('t', "tid", &target_tid, "tid",
+	OPT_STRING('t', "tid", &target.tid, "tid",
 		   "stat events on existing thread id"),
-	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('g', "group", &group,
 		    "put the counters into a counter group"),
@@ -1102,7 +1101,7 @@
 	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
 			   "print large numbers with thousands\' separators",
 			   stat__set_big_num),
-	OPT_STRING('C', "cpu", &cpu_list, "cpu",
+	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
 		    "list of cpus to monitor in system-wide"),
 	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
 		    "disable CPU count aggregation"),
@@ -1220,13 +1219,13 @@
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
 		big_num = false;
 
-	if (!argc && !target_pid && !target_tid)
+	if (!argc && !perf_target__has_task(&target))
 		usage_with_options(stat_usage, options);
 	if (run_count <= 0)
 		usage_with_options(stat_usage, options);
 
 	/* no_aggr, cgroup are for system-wide only */
-	if ((no_aggr || nr_cgroups) && !system_wide) {
+	if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
 
@@ -1236,23 +1235,14 @@
 	if (add_default_attributes())
 		goto out;
 
-	if (target_pid)
-		target_tid = target_pid;
+	perf_target__validate(&target);
 
-	evsel_list->threads = thread_map__new_str(target_pid,
-						  target_tid, UINT_MAX);
-	if (evsel_list->threads == NULL) {
-		pr_err("Problems finding threads of monitor\n");
-		usage_with_options(stat_usage, options);
-	}
+	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
+		if (perf_target__has_task(&target))
+			pr_err("Problems finding threads of monitor\n");
+		if (perf_target__has_cpu(&target))
+			perror("failed to parse CPUs map");
 
-	if (system_wide)
-		evsel_list->cpus = cpu_map__new(cpu_list);
-	else
-		evsel_list->cpus = cpu_map__dummy_new();
-
-	if (evsel_list->cpus == NULL) {
-		perror("failed to parse CPUs map");
 		usage_with_options(stat_usage, options);
 		return -1;
 	}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 223ffdc..6c47376 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -1195,6 +1195,10 @@
 static int test__PERF_RECORD(void)
 {
 	struct perf_record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
 		.no_delay   = true,
 		.freq	    = 10,
 		.mmap_pages = 256,
@@ -1237,8 +1241,7 @@
 	 * perf_evlist__prepare_workload we'll fill in the only thread
 	 * we're monitoring, the one forked there.
 	 */
-	err = perf_evlist__create_maps(evlist, opts.target_pid,
-				       opts.target_tid, UINT_MAX, opts.cpu_list);
+	err = perf_evlist__create_maps(evlist, &opts.target);
 	if (err < 0) {
 		pr_debug("Not enough memory to create thread/cpu maps\n");
 		goto out_delete_evlist;
@@ -1579,8 +1582,6 @@
 	sa.sa_sigaction = segfault_handler;
 	sigaction(SIGSEGV, &sa, NULL);
 
-	fprintf(stderr, "\n\n");
-
 	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 	if (fd < 0) {
 		die("Error: sys_perf_event_open() syscall returned "
@@ -1605,7 +1606,7 @@
 		loops *= 10;
 
 		delta = now - stamp;
-		fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+		pr_debug("%14d: %14Lu\n", n, (long long)delta);
 
 		delta_sum += delta;
 	}
@@ -1613,7 +1614,7 @@
 	munmap(addr, page_size);
 	close(fd);
 
-	fprintf(stderr, "   ");
+	pr_debug("   ");
 
 	if (!delta_sum)
 		return -1;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8ef59f8..3e981a7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -588,7 +588,7 @@
 	 * via --uid.
 	 */
 	list_for_each_entry(pos, &top->evlist->entries, node)
-		pos->hists.uid_filter_str = top->uid_str;
+		pos->hists.uid_filter_str = top->target.uid_str;
 
 	perf_evlist__tui_browse_hists(top->evlist, help,
 				      perf_top__sort_new_samples,
@@ -948,6 +948,10 @@
 
 				attr->type = PERF_TYPE_SOFTWARE;
 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
+				if (counter->name) {
+					free(counter->name);
+					counter->name = strdup(event_name(counter));
+				}
 				goto try_again;
 			}
 
@@ -1016,7 +1020,7 @@
 	if (ret)
 		goto out_delete;
 
-	if (top->target_tid || top->uid != UINT_MAX)
+	if (perf_target__has_task(&top->target))
 		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
 						  perf_event__process,
 						  &top->session->host_machine);
@@ -1150,14 +1154,17 @@
 int cmd_top(int argc, const char **argv, const char *prefix __used)
 {
 	struct perf_evsel *pos;
-	int status = -ENOMEM;
+	int status;
+	char errbuf[BUFSIZ];
 	struct perf_top top = {
 		.count_filter	     = 5,
 		.delay_secs	     = 2,
-		.uid		     = UINT_MAX,
 		.freq		     = 1000, /* 1 KHz */
 		.mmap_pages	     = 128,
 		.sym_pcnt_filter     = 5,
+		.target		     = {
+			.uses_mmap   = true,
+		},
 	};
 	char callchain_default_opt[] = "fractal,0.5,callee";
 	const struct option options[] = {
@@ -1166,13 +1173,13 @@
 		     parse_events_option),
 	OPT_INTEGER('c', "count", &top.default_interval,
 		    "event period to sample"),
-	OPT_STRING('p', "pid", &top.target_pid, "pid",
+	OPT_STRING('p', "pid", &top.target.pid, "pid",
 		    "profile events on existing process id"),
-	OPT_STRING('t', "tid", &top.target_tid, "tid",
+	OPT_STRING('t', "tid", &top.target.tid, "tid",
 		    "profile events on existing thread id"),
-	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
+	OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
 			    "system-wide collection from all CPUs"),
-	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
+	OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
@@ -1227,7 +1234,7 @@
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
-	OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
+	OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
 	OPT_END()
 	};
 
@@ -1253,22 +1260,27 @@
 
 	setup_browser(false);
 
-	top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
-	if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
-		goto out_delete_evlist;
-
-	/* CPU and PID are mutually exclusive */
-	if (top.target_tid && top.cpu_list) {
-		printf("WARNING: PID switch overriding CPU\n");
-		sleep(1);
-		top.cpu_list = NULL;
+	status = perf_target__validate(&top.target);
+	if (status) {
+		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
 	}
 
-	if (top.target_pid)
-		top.target_tid = top.target_pid;
+	status = perf_target__parse_uid(&top.target);
+	if (status) {
+		int saved_errno = errno;
 
-	if (perf_evlist__create_maps(top.evlist, top.target_pid,
-				     top.target_tid, top.uid, top.cpu_list) < 0)
+		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
+
+		status = -saved_errno;
+		goto out_delete_evlist;
+	}
+
+	if (perf_target__none(&top.target))
+		top.target.system_wide = true;
+
+	if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
 		usage_with_options(top_usage, options);
 
 	if (!top.evlist->nr_entries &&
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 89e3355..14f1034 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -207,10 +207,10 @@
 
 void pthread__unblock_sigwinch(void);
 
+#include "util/target.h"
+
 struct perf_record_opts {
-	const char   *target_pid;
-	const char   *target_tid;
-	uid_t	     uid;
+	struct perf_target target;
 	bool	     call_graph;
 	bool	     group;
 	bool	     inherit_stat;
@@ -223,7 +223,6 @@
 	bool	     sample_time;
 	bool	     sample_id_all_missing;
 	bool	     exclude_guest_missing;
-	bool	     system_wide;
 	bool	     period;
 	unsigned int freq;
 	unsigned int mmap_pages;
@@ -231,7 +230,6 @@
 	int	     branch_stack;
 	u64	     default_interval;
 	u64	     user_interval;
-	const char   *cpu_list;
 };
 
 #endif
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/ui/browser.c
similarity index 81%
rename from tools/perf/util/ui/browser.c
rename to tools/perf/ui/browser.c
index 5568291..cde4d0f 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -27,9 +27,12 @@
 	return HE_COLORSET_NORMAL;
 }
 
-void ui_browser__set_color(struct ui_browser *self __used, int color)
+int ui_browser__set_color(struct ui_browser *browser, int color)
 {
+	int ret = browser->current_color;
+	browser->current_color = color;
 	SLsmg_set_color(color);
+	return ret;
 }
 
 void ui_browser__set_percent_color(struct ui_browser *self,
@@ -503,6 +506,12 @@
 		.bg	  = "default",
 	},
 	{
+		.colorset = HE_COLORSET_ADDR,
+		.name	  = "addr",
+		.fg	  = "magenta",
+		.bg	  = "default",
+	},
+	{
 		.name = NULL,
 	}
 };
@@ -584,6 +593,111 @@
 	return row;
 }
 
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+			 u16 start, u16 end)
+{
+	SLsmg_set_char_set(1);
+	ui_browser__gotorc(browser, start, column);
+	SLsmg_draw_vline(end - start + 1);
+	SLsmg_set_char_set(0);
+}
+
+void ui_browser__write_graph(struct ui_browser *browser __used, int graph)
+{
+	SLsmg_set_char_set(1);
+	SLsmg_write_char(graph);
+	SLsmg_set_char_set(0);
+}
+
+static void __ui_browser__line_arrow_up(struct ui_browser *browser,
+					unsigned int column,
+					u64 start, u64 end)
+{
+	unsigned int row, end_row;
+
+	SLsmg_set_char_set(1);
+
+	if (start < browser->top_idx + browser->height) {
+		row = start - browser->top_idx;
+		ui_browser__gotorc(browser, row, column);
+		SLsmg_write_char(SLSMG_LLCORN_CHAR);
+		ui_browser__gotorc(browser, row, column + 1);
+		SLsmg_draw_hline(2);
+
+		if (row-- == 0)
+			goto out;
+	} else
+		row = browser->height - 1;
+
+	if (end > browser->top_idx)
+		end_row = end - browser->top_idx;
+	else
+		end_row = 0;
+
+	ui_browser__gotorc(browser, end_row, column);
+	SLsmg_draw_vline(row - end_row + 1);
+
+	ui_browser__gotorc(browser, end_row, column);
+	if (end >= browser->top_idx) {
+		SLsmg_write_char(SLSMG_ULCORN_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 1);
+		SLsmg_write_char(SLSMG_HLINE_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 2);
+		SLsmg_write_char(SLSMG_RARROW_CHAR);
+	}
+out:
+	SLsmg_set_char_set(0);
+}
+
+static void __ui_browser__line_arrow_down(struct ui_browser *browser,
+					  unsigned int column,
+					  u64 start, u64 end)
+{
+	unsigned int row, end_row;
+
+	SLsmg_set_char_set(1);
+
+	if (start >= browser->top_idx) {
+		row = start - browser->top_idx;
+		ui_browser__gotorc(browser, row, column);
+		SLsmg_write_char(SLSMG_ULCORN_CHAR);
+		ui_browser__gotorc(browser, row, column + 1);
+		SLsmg_draw_hline(2);
+
+		if (row++ == 0)
+			goto out;
+	} else
+		row = 0;
+
+	if (end >= browser->top_idx + browser->height)
+		end_row = browser->height - 1;
+	else
+		end_row = end - browser->top_idx;;
+
+	ui_browser__gotorc(browser, row, column);
+	SLsmg_draw_vline(end_row - row + 1);
+
+	ui_browser__gotorc(browser, end_row, column);
+	if (end < browser->top_idx + browser->height) {
+		SLsmg_write_char(SLSMG_LLCORN_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 1);
+		SLsmg_write_char(SLSMG_HLINE_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 2);
+		SLsmg_write_char(SLSMG_RARROW_CHAR);
+	}
+out:
+	SLsmg_set_char_set(0);
+}
+
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+			      u64 start, u64 end)
+{
+	if (start > end)
+		__ui_browser__line_arrow_up(browser, column, start, end);
+	else
+		__ui_browser__line_arrow_down(browser, column, start, end);
+}
+
 void ui_browser__init(void)
 {
 	int i = 0;
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/ui/browser.h
similarity index 86%
rename from tools/perf/util/ui/browser.h
rename to tools/perf/ui/browser.h
index 6ee82f6..dd96d82 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -10,11 +10,13 @@
 #define HE_COLORSET_NORMAL	52
 #define HE_COLORSET_SELECTED	53
 #define HE_COLORSET_CODE	54
+#define HE_COLORSET_ADDR	55
 
 struct ui_browser {
 	u64	      index, top_idx;
 	void	      *top, *entries;
 	u16	      y, x, width, height;
+	int	      current_color;
 	void	      *priv;
 	const char    *title;
 	char	      *helpline;
@@ -27,7 +29,7 @@
 	bool	      use_navkeypressed;
 };
 
-void ui_browser__set_color(struct ui_browser *self, int color);
+int  ui_browser__set_color(struct ui_browser *browser, int color);
 void ui_browser__set_percent_color(struct ui_browser *self,
 				   double percent, bool current);
 bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row);
@@ -35,6 +37,9 @@
 void ui_browser__reset_index(struct ui_browser *self);
 
 void ui_browser__gotorc(struct ui_browser *self, int y, int x);
+void ui_browser__write_graph(struct ui_browser *browser, int graph);
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+			      u64 start, u64 end);
 void __ui_browser__show_title(struct ui_browser *browser, const char *title);
 void ui_browser__show_title(struct ui_browser *browser, const char *title);
 int ui_browser__show(struct ui_browser *self, const char *title,
@@ -44,6 +49,8 @@
 int ui_browser__run(struct ui_browser *browser, int delay_secs);
 void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries);
 void ui_browser__handle_resize(struct ui_browser *browser);
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+			 u16 start, u16 end);
 
 int ui_browser__warning(struct ui_browser *browser, int timeout,
 			const char *format, ...);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
new file mode 100644
index 0000000..6e0ef79
--- /dev/null
+++ b/tools/perf/ui/browsers/annotate.c
@@ -0,0 +1,867 @@
+#include "../../util/util.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../libslang.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../../util/annotate.h"
+#include "../../util/hist.h"
+#include "../../util/sort.h"
+#include "../../util/symbol.h"
+#include <pthread.h>
+#include <newt.h>
+
+struct browser_disasm_line {
+	struct rb_node	rb_node;
+	double		percent;
+	u32		idx;
+	int		idx_asm;
+	int		jump_sources;
+};
+
+struct annotate_browser {
+	struct ui_browser b;
+	struct rb_root	  entries;
+	struct rb_node	  *curr_hot;
+	struct disasm_line	  *selection;
+	struct disasm_line  **offsets;
+	u64		    start;
+	int		    nr_asm_entries;
+	int		    nr_entries;
+	int		    max_jump_sources;
+	int		    nr_jumps;
+	bool		    hide_src_code;
+	bool		    use_offset;
+	bool		    jump_arrows;
+	bool		    show_nr_jumps;
+	bool		    searching_backwards;
+	u8		    addr_width;
+	u8		    jumps_width;
+	u8		    target_width;
+	u8		    min_addr_width;
+	u8		    max_addr_width;
+	char		    search_bf[128];
+};
+
+static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl)
+{
+	return (struct browser_disasm_line *)(dl + 1);
+}
+
+static bool disasm_line__filter(struct ui_browser *browser, void *entry)
+{
+	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+
+	if (ab->hide_src_code) {
+		struct disasm_line *dl = list_entry(entry, struct disasm_line, node);
+		return dl->offset == -1;
+	}
+
+	return false;
+}
+
+static int annotate_browser__jumps_percent_color(struct annotate_browser *browser,
+						 int nr, bool current)
+{
+	if (current && (!browser->b.use_navkeypressed || browser->b.navkeypressed))
+		return HE_COLORSET_SELECTED;
+	if (nr == browser->max_jump_sources)
+		return HE_COLORSET_TOP;
+	if (nr > 1)
+		return HE_COLORSET_MEDIUM;
+	return HE_COLORSET_NORMAL;
+}
+
+static int annotate_browser__set_jumps_percent_color(struct annotate_browser *browser,
+						     int nr, bool current)
+{
+	 int color = annotate_browser__jumps_percent_color(browser, nr, current);
+	 return ui_browser__set_color(&browser->b, color);
+}
+
+static void annotate_browser__write(struct ui_browser *self, void *entry, int row)
+{
+	struct annotate_browser *ab = container_of(self, struct annotate_browser, b);
+	struct disasm_line *dl = list_entry(entry, struct disasm_line, node);
+	struct browser_disasm_line *bdl = disasm_line__browser(dl);
+	bool current_entry = ui_browser__is_current_entry(self, row);
+	bool change_color = (!ab->hide_src_code &&
+			     (!current_entry || (self->use_navkeypressed &&
+					         !self->navkeypressed)));
+	int width = self->width, printed;
+	char bf[256];
+
+	if (dl->offset != -1 && bdl->percent != 0.0) {
+		ui_browser__set_percent_color(self, bdl->percent, current_entry);
+		slsmg_printf("%6.2f ", bdl->percent);
+	} else {
+		ui_browser__set_percent_color(self, 0, current_entry);
+		slsmg_write_nstring(" ", 7);
+	}
+
+	SLsmg_write_char(' ');
+
+	/* The scroll bar isn't being used */
+	if (!self->navkeypressed)
+		width += 1;
+
+	if (!*dl->line)
+		slsmg_write_nstring(" ", width - 7);
+	else if (dl->offset == -1) {
+		printed = scnprintf(bf, sizeof(bf), "%*s  ",
+				    ab->addr_width, " ");
+		slsmg_write_nstring(bf, printed);
+		slsmg_write_nstring(dl->line, width - printed - 6);
+	} else {
+		u64 addr = dl->offset;
+		int color = -1;
+
+		if (!ab->use_offset)
+			addr += ab->start;
+
+		if (!ab->use_offset) {
+			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
+		} else {
+			if (bdl->jump_sources) {
+				if (ab->show_nr_jumps) {
+					int prev;
+					printed = scnprintf(bf, sizeof(bf), "%*d ",
+							    ab->jumps_width,
+							    bdl->jump_sources);
+					prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources,
+											 current_entry);
+					slsmg_write_nstring(bf, printed);
+					ui_browser__set_color(self, prev);
+				}
+
+				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+						    ab->target_width, addr);
+			} else {
+				printed = scnprintf(bf, sizeof(bf), "%*s  ",
+						    ab->addr_width, " ");
+			}
+		}
+
+		if (change_color)
+			color = ui_browser__set_color(self, HE_COLORSET_ADDR);
+		slsmg_write_nstring(bf, printed);
+		if (change_color)
+			ui_browser__set_color(self, color);
+		if (dl->ins && dl->ins->ops->scnprintf) {
+			if (ins__is_jump(dl->ins)) {
+				bool fwd = dl->ops.target.offset > (u64)dl->offset;
+
+				ui_browser__write_graph(self, fwd ? SLSMG_DARROW_CHAR :
+								    SLSMG_UARROW_CHAR);
+				SLsmg_write_char(' ');
+			} else if (ins__is_call(dl->ins)) {
+				ui_browser__write_graph(self, SLSMG_RARROW_CHAR);
+				SLsmg_write_char(' ');
+			} else {
+				slsmg_write_nstring(" ", 2);
+			}
+		} else {
+			if (strcmp(dl->name, "retq")) {
+				slsmg_write_nstring(" ", 2);
+			} else {
+				ui_browser__write_graph(self, SLSMG_LARROW_CHAR);
+				SLsmg_write_char(' ');
+			}
+		}
+
+		disasm_line__scnprintf(dl, bf, sizeof(bf), !ab->use_offset);
+		slsmg_write_nstring(bf, width - 10 - printed);
+	}
+
+	if (current_entry)
+		ab->selection = dl;
+}
+
+static void annotate_browser__draw_current_jump(struct ui_browser *browser)
+{
+	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+	struct disasm_line *cursor = ab->selection, *target;
+	struct browser_disasm_line *btarget, *bcursor;
+	unsigned int from, to;
+
+	if (!cursor->ins || !ins__is_jump(cursor->ins) ||
+	    !disasm_line__has_offset(cursor))
+		return;
+
+	target = ab->offsets[cursor->ops.target.offset];
+	if (!target)
+		return;
+
+	bcursor = disasm_line__browser(cursor);
+	btarget = disasm_line__browser(target);
+
+	if (ab->hide_src_code) {
+		from = bcursor->idx_asm;
+		to = btarget->idx_asm;
+	} else {
+		from = (u64)bcursor->idx;
+		to = (u64)btarget->idx;
+	}
+
+	ui_browser__set_color(browser, HE_COLORSET_CODE);
+	__ui_browser__line_arrow(browser, 9 + ab->addr_width, from, to);
+}
+
+static unsigned int annotate_browser__refresh(struct ui_browser *browser)
+{
+	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+	int ret = ui_browser__list_head_refresh(browser);
+
+	if (ab->jump_arrows)
+		annotate_browser__draw_current_jump(browser);
+
+	ui_browser__set_color(browser, HE_COLORSET_NORMAL);
+	__ui_browser__vline(browser, 7, 0, browser->height - 1);
+	return ret;
+}
+
+static double disasm_line__calc_percent(struct disasm_line *dl, struct symbol *sym, int evidx)
+{
+	double percent = 0.0;
+
+	if (dl->offset != -1) {
+		int len = sym->end - sym->start;
+		unsigned int hits = 0;
+		struct annotation *notes = symbol__annotation(sym);
+		struct source_line *src_line = notes->src->lines;
+		struct sym_hist *h = annotation__histogram(notes, evidx);
+		s64 offset = dl->offset;
+		struct disasm_line *next;
+
+		next = disasm__get_next_ip_line(&notes->src->source, dl);
+		while (offset < (s64)len &&
+		       (next == NULL || offset < next->offset)) {
+			if (src_line) {
+				percent += src_line[offset].percent;
+			} else
+				hits += h->addr[offset];
+
+			++offset;
+		}
+		/*
+ 		 * If the percentage wasn't already calculated in
+ 		 * symbol__get_source_line, do it now:
+ 		 */
+		if (src_line == NULL && h->sum)
+			percent = 100.0 * hits / h->sum;
+	}
+
+	return percent;
+}
+
+static void disasm_rb_tree__insert(struct rb_root *root, struct browser_disasm_line *bdl)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct browser_disasm_line *l;
+
+	while (*p != NULL) {
+		parent = *p;
+		l = rb_entry(parent, struct browser_disasm_line, rb_node);
+		if (bdl->percent < l->percent)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&bdl->rb_node, parent, p);
+	rb_insert_color(&bdl->rb_node, root);
+}
+
+static void annotate_browser__set_top(struct annotate_browser *self,
+				      struct disasm_line *pos, u32 idx)
+{
+	unsigned back;
+
+	ui_browser__refresh_dimensions(&self->b);
+	back = self->b.height / 2;
+	self->b.top_idx = self->b.index = idx;
+
+	while (self->b.top_idx != 0 && back != 0) {
+		pos = list_entry(pos->node.prev, struct disasm_line, node);
+
+		if (disasm_line__filter(&self->b, &pos->node))
+			continue;
+
+		--self->b.top_idx;
+		--back;
+	}
+
+	self->b.top = pos;
+	self->b.navkeypressed = true;
+}
+
+static void annotate_browser__set_rb_top(struct annotate_browser *browser,
+					 struct rb_node *nd)
+{
+	struct browser_disasm_line *bpos;
+	struct disasm_line *pos;
+
+	bpos = rb_entry(nd, struct browser_disasm_line, rb_node);
+	pos = ((struct disasm_line *)bpos) - 1;
+	annotate_browser__set_top(browser, pos, bpos->idx);
+	browser->curr_hot = nd;
+}
+
+static void annotate_browser__calc_percent(struct annotate_browser *browser,
+					   int evidx)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *pos;
+
+	browser->entries = RB_ROOT;
+
+	pthread_mutex_lock(&notes->lock);
+
+	list_for_each_entry(pos, &notes->src->source, node) {
+		struct browser_disasm_line *bpos = disasm_line__browser(pos);
+		bpos->percent = disasm_line__calc_percent(pos, sym, evidx);
+		if (bpos->percent < 0.01) {
+			RB_CLEAR_NODE(&bpos->rb_node);
+			continue;
+		}
+		disasm_rb_tree__insert(&browser->entries, bpos);
+	}
+	pthread_mutex_unlock(&notes->lock);
+
+	browser->curr_hot = rb_last(&browser->entries);
+}
+
+static bool annotate_browser__toggle_source(struct annotate_browser *browser)
+{
+	struct disasm_line *dl;
+	struct browser_disasm_line *bdl;
+	off_t offset = browser->b.index - browser->b.top_idx;
+
+	browser->b.seek(&browser->b, offset, SEEK_CUR);
+	dl = list_entry(browser->b.top, struct disasm_line, node);
+	bdl = disasm_line__browser(dl);
+
+	if (browser->hide_src_code) {
+		if (bdl->idx_asm < offset)
+			offset = bdl->idx;
+
+		browser->b.nr_entries = browser->nr_entries;
+		browser->hide_src_code = false;
+		browser->b.seek(&browser->b, -offset, SEEK_CUR);
+		browser->b.top_idx = bdl->idx - offset;
+		browser->b.index = bdl->idx;
+	} else {
+		if (bdl->idx_asm < 0) {
+			ui_helpline__puts("Only available for assembly lines.");
+			browser->b.seek(&browser->b, -offset, SEEK_CUR);
+			return false;
+		}
+
+		if (bdl->idx_asm < offset)
+			offset = bdl->idx_asm;
+
+		browser->b.nr_entries = browser->nr_asm_entries;
+		browser->hide_src_code = true;
+		browser->b.seek(&browser->b, -offset, SEEK_CUR);
+		browser->b.top_idx = bdl->idx_asm - offset;
+		browser->b.index = bdl->idx_asm;
+	}
+
+	return true;
+}
+
+static bool annotate_browser__callq(struct annotate_browser *browser,
+				    int evidx, void (*timer)(void *arg),
+				    void *arg, int delay_secs)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct disasm_line *dl = browser->selection;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes;
+	struct symbol *target;
+	u64 ip;
+
+	if (!ins__is_call(dl->ins))
+		return false;
+
+	ip = ms->map->map_ip(ms->map, dl->ops.target.addr);
+	target = map__find_symbol(ms->map, ip, NULL);
+	if (target == NULL) {
+		ui_helpline__puts("The called function was not found.");
+		return true;
+	}
+
+	notes = symbol__annotation(target);
+	pthread_mutex_lock(&notes->lock);
+
+	if (notes->src == NULL && symbol__alloc_hist(target) < 0) {
+		pthread_mutex_unlock(&notes->lock);
+		ui__warning("Not enough memory for annotating '%s' symbol!\n",
+			    target->name);
+		return true;
+	}
+
+	pthread_mutex_unlock(&notes->lock);
+	symbol__tui_annotate(target, ms->map, evidx, timer, arg, delay_secs);
+	ui_browser__show_title(&browser->b, sym->name);
+	return true;
+}
+
+static
+struct disasm_line *annotate_browser__find_offset(struct annotate_browser *browser,
+					  s64 offset, s64 *idx)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *pos;
+
+	*idx = 0;
+	list_for_each_entry(pos, &notes->src->source, node) {
+		if (pos->offset == offset)
+			return pos;
+		if (!disasm_line__filter(&browser->b, &pos->node))
+			++*idx;
+	}
+
+	return NULL;
+}
+
+static bool annotate_browser__jump(struct annotate_browser *browser)
+{
+	struct disasm_line *dl = browser->selection;
+	s64 idx;
+
+	if (!ins__is_jump(dl->ins))
+		return false;
+
+	dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx);
+	if (dl == NULL) {
+		ui_helpline__puts("Invallid jump offset");
+		return true;
+	}
+
+	annotate_browser__set_top(browser, dl, idx);
+	
+	return true;
+}
+
+static
+struct disasm_line *annotate_browser__find_string(struct annotate_browser *browser,
+					  char *s, s64 *idx)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *pos = browser->selection;
+
+	*idx = browser->b.index;
+	list_for_each_entry_continue(pos, &notes->src->source, node) {
+		if (disasm_line__filter(&browser->b, &pos->node))
+			continue;
+
+		++*idx;
+
+		if (pos->line && strstr(pos->line, s) != NULL)
+			return pos;
+	}
+
+	return NULL;
+}
+
+static bool __annotate_browser__search(struct annotate_browser *browser)
+{
+	struct disasm_line *dl;
+	s64 idx;
+
+	dl = annotate_browser__find_string(browser, browser->search_bf, &idx);
+	if (dl == NULL) {
+		ui_helpline__puts("String not found!");
+		return false;
+	}
+
+	annotate_browser__set_top(browser, dl, idx);
+	browser->searching_backwards = false;
+	return true;
+}
+
+static
+struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browser *browser,
+						  char *s, s64 *idx)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *pos = browser->selection;
+
+	*idx = browser->b.index;
+	list_for_each_entry_continue_reverse(pos, &notes->src->source, node) {
+		if (disasm_line__filter(&browser->b, &pos->node))
+			continue;
+
+		--*idx;
+
+		if (pos->line && strstr(pos->line, s) != NULL)
+			return pos;
+	}
+
+	return NULL;
+}
+
+static bool __annotate_browser__search_reverse(struct annotate_browser *browser)
+{
+	struct disasm_line *dl;
+	s64 idx;
+
+	dl = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx);
+	if (dl == NULL) {
+		ui_helpline__puts("String not found!");
+		return false;
+	}
+
+	annotate_browser__set_top(browser, dl, idx);
+	browser->searching_backwards = true;
+	return true;
+}
+
+static bool annotate_browser__search_window(struct annotate_browser *browser,
+					    int delay_secs)
+{
+	if (ui_browser__input_window("Search", "String: ", browser->search_bf,
+				     "ENTER: OK, ESC: Cancel",
+				     delay_secs * 2) != K_ENTER ||
+	    !*browser->search_bf)
+		return false;
+
+	return true;
+}
+
+static bool annotate_browser__search(struct annotate_browser *browser, int delay_secs)
+{
+	if (annotate_browser__search_window(browser, delay_secs))
+		return __annotate_browser__search(browser);
+
+	return false;
+}
+
+static bool annotate_browser__continue_search(struct annotate_browser *browser,
+					      int delay_secs)
+{
+	if (!*browser->search_bf)
+		return annotate_browser__search(browser, delay_secs);
+
+	return __annotate_browser__search(browser);
+}
+
+static bool annotate_browser__search_reverse(struct annotate_browser *browser,
+					   int delay_secs)
+{
+	if (annotate_browser__search_window(browser, delay_secs))
+		return __annotate_browser__search_reverse(browser);
+
+	return false;
+}
+
+static
+bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
+					       int delay_secs)
+{
+	if (!*browser->search_bf)
+		return annotate_browser__search_reverse(browser, delay_secs);
+
+	return __annotate_browser__search_reverse(browser);
+}
+
+static int annotate_browser__run(struct annotate_browser *self, int evidx,
+				 void(*timer)(void *arg),
+				 void *arg, int delay_secs)
+{
+	struct rb_node *nd = NULL;
+	struct map_symbol *ms = self->b.priv;
+	struct symbol *sym = ms->sym;
+	const char *help = "Press 'h' for help on key bindings";
+	int key;
+
+	if (ui_browser__show(&self->b, sym->name, help) < 0)
+		return -1;
+
+	annotate_browser__calc_percent(self, evidx);
+
+	if (self->curr_hot) {
+		annotate_browser__set_rb_top(self, self->curr_hot);
+		self->b.navkeypressed = false;
+	}
+
+	nd = self->curr_hot;
+
+	while (1) {
+		key = ui_browser__run(&self->b, delay_secs);
+
+		if (delay_secs != 0) {
+			annotate_browser__calc_percent(self, evidx);
+			/*
+			 * Current line focus got out of the list of most active
+			 * lines, NULL it so that if TAB|UNTAB is pressed, we
+			 * move to curr_hot (current hottest line).
+			 */
+			if (nd != NULL && RB_EMPTY_NODE(nd))
+				nd = NULL;
+		}
+
+		switch (key) {
+		case K_TIMER:
+			if (timer != NULL)
+				timer(arg);
+
+			if (delay_secs != 0)
+				symbol__annotate_decay_histogram(sym, evidx);
+			continue;
+		case K_TAB:
+			if (nd != NULL) {
+				nd = rb_prev(nd);
+				if (nd == NULL)
+					nd = rb_last(&self->entries);
+			} else
+				nd = self->curr_hot;
+			break;
+		case K_UNTAB:
+			if (nd != NULL)
+				nd = rb_next(nd);
+				if (nd == NULL)
+					nd = rb_first(&self->entries);
+			else
+				nd = self->curr_hot;
+			break;
+		case K_F1:
+		case 'h':
+			ui_browser__help_window(&self->b,
+		"UP/DOWN/PGUP\n"
+		"PGDN/SPACE    Navigate\n"
+		"q/ESC/CTRL+C  Exit\n\n"
+		"->            Go to target\n"
+		"<-            Exit\n"
+		"h             Cycle thru hottest instructions\n"
+		"j             Toggle showing jump to target arrows\n"
+		"J             Toggle showing number of jump sources on targets\n"
+		"n             Search next string\n"
+		"o             Toggle disassembler output/simplified view\n"
+		"s             Toggle source code view\n"
+		"/             Search string\n"
+		"?             Search previous string\n");
+			continue;
+		case 'H':
+			nd = self->curr_hot;
+			break;
+		case 's':
+			if (annotate_browser__toggle_source(self))
+				ui_helpline__puts(help);
+			continue;
+		case 'o':
+			self->use_offset = !self->use_offset;
+			if (self->use_offset)
+				self->target_width = self->min_addr_width;
+			else
+				self->target_width = self->max_addr_width;
+update_addr_width:
+			self->addr_width = self->target_width;
+			if (self->show_nr_jumps)
+				self->addr_width += self->jumps_width + 1;
+			continue;
+		case 'j':
+			self->jump_arrows = !self->jump_arrows;
+			continue;
+		case 'J':
+			self->show_nr_jumps = !self->show_nr_jumps;
+			goto update_addr_width;
+		case '/':
+			if (annotate_browser__search(self, delay_secs)) {
+show_help:
+				ui_helpline__puts(help);
+			}
+			continue;
+		case 'n':
+			if (self->searching_backwards ?
+			    annotate_browser__continue_search_reverse(self, delay_secs) :
+			    annotate_browser__continue_search(self, delay_secs))
+				goto show_help;
+			continue;
+		case '?':
+			if (annotate_browser__search_reverse(self, delay_secs))
+				goto show_help;
+			continue;
+		case K_ENTER:
+		case K_RIGHT:
+			if (self->selection == NULL)
+				ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org");
+			else if (self->selection->offset == -1)
+				ui_helpline__puts("Actions are only available for assembly lines.");
+			else if (!self->selection->ins) {
+				if (strcmp(self->selection->name, "retq"))
+					goto show_sup_ins;
+				goto out;
+			} else if (!(annotate_browser__jump(self) ||
+				     annotate_browser__callq(self, evidx, timer, arg, delay_secs))) {
+show_sup_ins:
+				ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
+			}
+			continue;
+		case K_LEFT:
+		case K_ESC:
+		case 'q':
+		case CTRL('c'):
+			goto out;
+		default:
+			continue;
+		}
+
+		if (nd != NULL)
+			annotate_browser__set_rb_top(self, nd);
+	}
+out:
+	ui_browser__hide(&self->b);
+	return key;
+}
+
+int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
+			     void(*timer)(void *arg), void *arg, int delay_secs)
+{
+	return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx,
+				    timer, arg, delay_secs);
+}
+
+static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
+						size_t size)
+{
+	u64 offset;
+
+	for (offset = 0; offset < size; ++offset) {
+		struct disasm_line *dl = browser->offsets[offset], *dlt;
+		struct browser_disasm_line *bdlt;
+
+		if (!dl || !dl->ins || !ins__is_jump(dl->ins) ||
+		    !disasm_line__has_offset(dl))
+			continue;
+
+		if (dl->ops.target.offset >= size) {
+			ui__error("jump to after symbol!\n"
+				  "size: %zx, jump target: %" PRIx64,
+				  size, dl->ops.target.offset);
+			continue;
+		}
+
+		dlt = browser->offsets[dl->ops.target.offset];
+		/*
+ 		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
+ 		 * have to adjust to the previous offset?
+ 		 */
+		if (dlt == NULL)
+			continue;
+
+		bdlt = disasm_line__browser(dlt);
+		if (++bdlt->jump_sources > browser->max_jump_sources)
+			browser->max_jump_sources = bdlt->jump_sources;
+
+		++browser->nr_jumps;
+	}
+		
+}
+
+static inline int width_jumps(int n)
+{
+	if (n >= 100)
+		return 5;
+	if (n / 10)
+		return 2;
+	return 1;
+}
+
+int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
+			 void(*timer)(void *arg), void *arg,
+			 int delay_secs)
+{
+	struct disasm_line *pos, *n;
+	struct annotation *notes;
+	const size_t size = symbol__size(sym);
+	struct map_symbol ms = {
+		.map = map,
+		.sym = sym,
+	};
+	struct annotate_browser browser = {
+		.b = {
+			.refresh = annotate_browser__refresh,
+			.seek	 = ui_browser__list_head_seek,
+			.write	 = annotate_browser__write,
+			.filter  = disasm_line__filter,
+			.priv	 = &ms,
+			.use_navkeypressed = true,
+		},
+		.use_offset = true,
+		.jump_arrows = true,
+	};
+	int ret = -1;
+
+	if (sym == NULL)
+		return -1;
+
+	if (map->dso->annotate_warned)
+		return -1;
+
+	browser.offsets = zalloc(size * sizeof(struct disasm_line *));
+	if (browser.offsets == NULL) {
+		ui__error("Not enough memory!");
+		return -1;
+	}
+
+	if (symbol__annotate(sym, map, sizeof(struct browser_disasm_line)) < 0) {
+		ui__error("%s", ui_helpline__last_msg);
+		goto out_free_offsets;
+	}
+
+	ui_helpline__push("Press <- or ESC to exit");
+
+	notes = symbol__annotation(sym);
+	browser.start = map__rip_2objdump(map, sym->start);
+
+	list_for_each_entry(pos, &notes->src->source, node) {
+		struct browser_disasm_line *bpos;
+		size_t line_len = strlen(pos->line);
+
+		if (browser.b.width < line_len)
+			browser.b.width = line_len;
+		bpos = disasm_line__browser(pos);
+		bpos->idx = browser.nr_entries++;
+		if (pos->offset != -1) {
+			bpos->idx_asm = browser.nr_asm_entries++;
+			/*
+			 * FIXME: short term bandaid to cope with assembly
+			 * routines that comes with labels in the same column
+			 * as the address in objdump, sigh.
+			 *
+			 * E.g. copy_user_generic_unrolled
+ 			 */
+			if (pos->offset < (s64)size)
+				browser.offsets[pos->offset] = pos;
+		} else
+			bpos->idx_asm = -1;
+	}
+
+	annotate_browser__mark_jump_targets(&browser, size);
+
+	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
+	browser.max_addr_width = hex_width(sym->end);
+	browser.jumps_width = width_jumps(browser.max_jump_sources);
+	browser.b.nr_entries = browser.nr_entries;
+	browser.b.entries = &notes->src->source,
+	browser.b.width += 18; /* Percentage */
+	ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs);
+	list_for_each_entry_safe(pos, n, &notes->src->source, node) {
+		list_del(&pos->node);
+		disasm_line__free(pos);
+	}
+
+out_free_offsets:
+	free(browser.offsets);
+	return ret;
+}
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
similarity index 98%
rename from tools/perf/util/ui/browsers/hists.c
rename to tools/perf/ui/browsers/hists.c
index 2f83e5d..a372a4b 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -5,12 +5,12 @@
 #include <newt.h>
 #include <linux/rbtree.h>
 
-#include "../../evsel.h"
-#include "../../evlist.h"
-#include "../../hist.h"
-#include "../../pstack.h"
-#include "../../sort.h"
-#include "../../util.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/hist.h"
+#include "../../util/pstack.h"
+#include "../../util/sort.h"
+#include "../../util/util.h"
 
 #include "../browser.h"
 #include "../helpline.h"
@@ -840,10 +840,14 @@
 	int printed;
 	const struct dso *dso = self->dso_filter;
 	const struct thread *thread = self->thread_filter;
-	unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
+	unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
+	u64 nr_events = self->stats.total_period;
 
-	nr_events = convert_unit(nr_events, &unit);
-	printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
+	nr_samples = convert_unit(nr_samples, &unit);
+	printed = scnprintf(bf, size,
+			   "Samples: %lu%c of event '%s', Event count (approx.): %lu",
+			   nr_samples, unit, ev_name, nr_events);
+
 
 	if (self->uid_filter_str)
 		printed += snprintf(bf + printed, size - printed,
@@ -937,7 +941,7 @@
 			goto zoom_dso;
 		case 't':
 			goto zoom_thread;
-		case 's':
+		case '/':
 			if (ui_browser__input_window("Symbol to show",
 					"Please enter the name of symbol you want to see",
 					buf, "ENTER: OK, ESC: Cancel",
@@ -965,7 +969,7 @@
 					"E             Expand all callchains\n"
 					"d             Zoom into current DSO\n"
 					"t             Zoom into current Thread\n"
-					"s             Filter symbol by name");
+					"/             Filter symbol by name");
 			continue;
 		case K_ENTER:
 		case K_RIGHT:
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
similarity index 97%
rename from tools/perf/util/ui/browsers/map.c
rename to tools/perf/ui/browsers/map.c
index eca6575..98851d5 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -5,9 +5,9 @@
 #include <sys/ttydefaults.h>
 #include <string.h>
 #include <linux/bitops.h>
-#include "../../util.h"
-#include "../../debug.h"
-#include "../../symbol.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+#include "../../util/symbol.h"
 #include "../browser.h"
 #include "../helpline.h"
 #include "map.h"
diff --git a/tools/perf/util/ui/browsers/map.h b/tools/perf/ui/browsers/map.h
similarity index 100%
rename from tools/perf/util/ui/browsers/map.h
rename to tools/perf/ui/browsers/map.h
diff --git a/tools/perf/util/gtk/browser.c b/tools/perf/ui/gtk/browser.c
similarity index 84%
rename from tools/perf/util/gtk/browser.c
rename to tools/perf/ui/gtk/browser.c
index 258352a..0656c38 100644
--- a/tools/perf/util/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -9,24 +9,13 @@
 
 #define MAX_COLUMNS			32
 
-void perf_gtk_setup_browser(int argc, const char *argv[],
-			    bool fallback_to_pager __used)
-{
-	gtk_init(&argc, (char ***)&argv);
-}
-
-void perf_gtk_exit_browser(bool wait_for_ok __used)
-{
-	gtk_main_quit();
-}
-
-static void perf_gtk_signal(int sig)
+static void perf_gtk__signal(int sig)
 {
 	psignal(sig, "perf");
 	gtk_main_quit();
 }
 
-static void perf_gtk_resize_window(GtkWidget *window)
+static void perf_gtk__resize_window(GtkWidget *window)
 {
 	GdkRectangle rect;
 	GdkScreen *screen;
@@ -46,7 +35,7 @@
 	gtk_window_resize(GTK_WINDOW(window), width, height);
 }
 
-static void perf_gtk_show_hists(GtkWidget *window, struct hists *hists)
+static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
 {
 	GType col_types[MAX_COLUMNS];
 	GtkCellRenderer *renderer;
@@ -142,11 +131,11 @@
 	GtkWidget *notebook;
 	GtkWidget *window;
 
-	signal(SIGSEGV, perf_gtk_signal);
-	signal(SIGFPE,  perf_gtk_signal);
-	signal(SIGINT,  perf_gtk_signal);
-	signal(SIGQUIT, perf_gtk_signal);
-	signal(SIGTERM, perf_gtk_signal);
+	signal(SIGSEGV, perf_gtk__signal);
+	signal(SIGFPE,  perf_gtk__signal);
+	signal(SIGINT,  perf_gtk__signal);
+	signal(SIGQUIT, perf_gtk__signal);
+	signal(SIGTERM, perf_gtk__signal);
 
 	window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
 
@@ -168,7 +157,7 @@
 							GTK_POLICY_AUTOMATIC,
 							GTK_POLICY_AUTOMATIC);
 
-		perf_gtk_show_hists(scrolled_window, hists);
+		perf_gtk__show_hists(scrolled_window, hists);
 
 		tab_label = gtk_label_new(evname);
 
@@ -179,7 +168,7 @@
 
 	gtk_widget_show_all(window);
 
-	perf_gtk_resize_window(window);
+	perf_gtk__resize_window(window);
 
 	gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
 
diff --git a/tools/perf/util/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
similarity index 100%
rename from tools/perf/util/gtk/gtk.h
rename to tools/perf/ui/gtk/gtk.h
diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
new file mode 100644
index 0000000..8295299
--- /dev/null
+++ b/tools/perf/ui/gtk/setup.c
@@ -0,0 +1,12 @@
+#include "gtk.h"
+#include "../../util/cache.h"
+
+int perf_gtk__init(void)
+{
+	return gtk_init_check(NULL, NULL) ? 0 : -1;
+}
+
+void perf_gtk__exit(bool wait_for_ok __used)
+{
+	gtk_main_quit();
+}
diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/ui/helpline.c
similarity index 100%
rename from tools/perf/util/ui/helpline.c
rename to tools/perf/ui/helpline.c
diff --git a/tools/perf/util/ui/helpline.h b/tools/perf/ui/helpline.h
similarity index 100%
rename from tools/perf/util/ui/helpline.h
rename to tools/perf/ui/helpline.h
diff --git a/tools/perf/util/ui/keysyms.h b/tools/perf/ui/keysyms.h
similarity index 100%
rename from tools/perf/util/ui/keysyms.h
rename to tools/perf/ui/keysyms.h
diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/ui/libslang.h
similarity index 100%
rename from tools/perf/util/ui/libslang.h
rename to tools/perf/ui/libslang.h
diff --git a/tools/perf/util/ui/progress.c b/tools/perf/ui/progress.c
similarity index 100%
rename from tools/perf/util/ui/progress.c
rename to tools/perf/ui/progress.c
diff --git a/tools/perf/util/ui/progress.h b/tools/perf/ui/progress.h
similarity index 100%
rename from tools/perf/util/ui/progress.h
rename to tools/perf/ui/progress.h
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
new file mode 100644
index 0000000..9f5f888
--- /dev/null
+++ b/tools/perf/ui/setup.c
@@ -0,0 +1,45 @@
+#include "../cache.h"
+#include "../debug.h"
+
+
+void setup_browser(bool fallback_to_pager)
+{
+	if (!isatty(1) || dump_trace)
+		use_browser = 0;
+
+	/* default to TUI */
+	if (use_browser < 0)
+		use_browser = 1;
+
+	switch (use_browser) {
+	case 2:
+		if (perf_gtk__init() == 0)
+			break;
+		/* fall through */
+	case 1:
+		use_browser = 1;
+		if (ui__init() == 0)
+			break;
+		/* fall through */
+	default:
+		if (fallback_to_pager)
+			setup_pager();
+		break;
+	}
+}
+
+void exit_browser(bool wait_for_ok)
+{
+	switch (use_browser) {
+	case 2:
+		perf_gtk__exit(wait_for_ok);
+		break;
+
+	case 1:
+		ui__exit(wait_for_ok);
+		break;
+
+	default:
+		break;
+	}
+}
diff --git a/tools/perf/util/ui/setup.c b/tools/perf/ui/tui/setup.c
similarity index 74%
rename from tools/perf/util/ui/setup.c
rename to tools/perf/ui/tui/setup.c
index 85a69fa..d33e943 100644
--- a/tools/perf/util/ui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -2,14 +2,14 @@
 #include <signal.h>
 #include <stdbool.h>
 
-#include "../cache.h"
-#include "../debug.h"
-#include "browser.h"
-#include "helpline.h"
-#include "ui.h"
-#include "util.h"
-#include "libslang.h"
-#include "keysyms.h"
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+#include "../keysyms.h"
 
 pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -93,45 +93,26 @@
 	newtResume();
 }
 
-static int ui__init(void)
-{
-	int err = SLkp_init();
-
-	if (err < 0)
-		goto out;
-
-	SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
-out:
-	return err;
-}
-
-static void ui__exit(void)
-{
-	SLtt_set_cursor_visibility(1);
-	SLsmg_refresh();
-	SLsmg_reset_smg();
-	SLang_reset_tty();
-}
-
 static void ui__signal(int sig)
 {
-	ui__exit();
+	ui__exit(false);
 	psignal(sig, "perf");
 	exit(0);
 }
 
-void setup_browser(bool fallback_to_pager)
+int ui__init(void)
 {
-	if (!isatty(1) || !use_browser || dump_trace) {
-		use_browser = 0;
-		if (fallback_to_pager)
-			setup_pager();
-		return;
+	int err;
+
+	newtInit();
+	err = SLkp_init();
+	if (err < 0) {
+		pr_err("TUI initialization failed.\n");
+		goto out;
 	}
 
-	use_browser = 1;
-	newtInit();
-	ui__init();
+	SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
+
 	newtSetSuspendCallback(newt_suspend, NULL);
 	ui_helpline__init();
 	ui_browser__init();
@@ -141,15 +122,19 @@
 	signal(SIGINT, ui__signal);
 	signal(SIGQUIT, ui__signal);
 	signal(SIGTERM, ui__signal);
+out:
+	return err;
 }
 
-void exit_browser(bool wait_for_ok)
+void ui__exit(bool wait_for_ok)
 {
-	if (use_browser > 0) {
-		if (wait_for_ok)
-			ui__question_window("Fatal Error",
-					    ui_helpline__last_msg,
-					    "Press any key...", 0);
-		ui__exit();
-	}
+	if (wait_for_ok)
+		ui__question_window("Fatal Error",
+				    ui_helpline__last_msg,
+				    "Press any key...", 0);
+
+	SLtt_set_cursor_visibility(1);
+	SLsmg_refresh();
+	SLsmg_reset_smg();
+	SLang_reset_tty();
 }
diff --git a/tools/perf/util/ui/ui.h b/tools/perf/ui/ui.h
similarity index 100%
rename from tools/perf/util/ui/ui.h
rename to tools/perf/ui/ui.h
diff --git a/tools/perf/util/ui/util.c b/tools/perf/ui/util.c
similarity index 100%
rename from tools/perf/util/ui/util.c
rename to tools/perf/ui/util.c
diff --git a/tools/perf/util/ui/util.h b/tools/perf/ui/util.h
similarity index 100%
rename from tools/perf/util/ui/util.h
rename to tools/perf/ui/util.h
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 08c6d13..8069dfb 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -18,6 +18,403 @@
 
 const char 	*disassembler_style;
 
+static struct ins *ins__find(const char *name);
+static int disasm_line__parse(char *line, char **namep, char **rawp);
+
+static void ins__delete(struct ins_operands *ops)
+{
+	free(ops->source.raw);
+	free(ops->source.name);
+	free(ops->target.raw);
+	free(ops->target.name);
+}
+
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+			      struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+		  struct ins_operands *ops)
+{
+	if (ins->ops->scnprintf)
+		return ins->ops->scnprintf(ins, bf, size, ops);
+
+	return ins__raw_scnprintf(ins, bf, size, ops);
+}
+
+static int call__parse(struct ins_operands *ops)
+{
+	char *endptr, *tok, *name;
+
+	ops->target.addr = strtoull(ops->raw, &endptr, 16);
+
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		goto indirect_call;
+
+	name++;
+
+	tok = strchr(name, '>');
+	if (tok == NULL)
+		return -1;
+
+	*tok = '\0';
+	ops->target.name = strdup(name);
+	*tok = '>';
+
+	return ops->target.name == NULL ? -1 : 0;
+
+indirect_call:
+	tok = strchr(endptr, '(');
+	if (tok != NULL) {
+		ops->target.addr = 0;
+		return 0;
+	}
+
+	tok = strchr(endptr, '*');
+	if (tok == NULL)
+		return -1;
+
+	ops->target.addr = strtoull(tok + 1, NULL, 16);
+	return 0;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	if (ops->target.name)
+		return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name);
+
+	if (ops->target.addr == 0)
+		return ins__raw_scnprintf(ins, bf, size, ops);
+
+	return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr);
+}
+
+static struct ins_ops call_ops = {
+	.parse	   = call__parse,
+	.scnprintf = call__scnprintf,
+};
+
+bool ins__is_call(const struct ins *ins)
+{
+	return ins->ops == &call_ops;
+}
+
+static int jump__parse(struct ins_operands *ops)
+{
+	const char *s = strchr(ops->raw, '+');
+
+	ops->target.addr = strtoll(ops->raw, NULL, 16);
+
+	if (s++ != NULL)
+		ops->target.offset = strtoll(s, NULL, 16);
+	else
+		ops->target.offset = UINT64_MAX;
+
+	return 0;
+}
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
+}
+
+static struct ins_ops jump_ops = {
+	.parse	   = jump__parse,
+	.scnprintf = jump__scnprintf,
+};
+
+bool ins__is_jump(const struct ins *ins)
+{
+	return ins->ops == &jump_ops;
+}
+
+static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
+{
+	char *endptr, *name, *t;
+
+	if (strstr(raw, "(%rip)") == NULL)
+		return 0;
+
+	*addrp = strtoull(comment, &endptr, 16);
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		return -1;
+
+	name++;
+
+	t = strchr(name, '>');
+	if (t == NULL)
+		return 0;
+
+	*t = '\0';
+	*namep = strdup(name);
+	*t = '>';
+
+	return 0;
+}
+
+static int lock__parse(struct ins_operands *ops)
+{
+	char *name;
+
+	ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
+	if (ops->locked.ops == NULL)
+		return 0;
+
+	if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0)
+		goto out_free_ops;
+
+        ops->locked.ins = ins__find(name);
+        if (ops->locked.ins == NULL)
+                goto out_free_ops;
+
+        if (!ops->locked.ins->ops)
+                return 0;
+
+        if (ops->locked.ins->ops->parse)
+                ops->locked.ins->ops->parse(ops->locked.ops);
+
+	return 0;
+
+out_free_ops:
+	free(ops->locked.ops);
+	ops->locked.ops = NULL;
+	return 0;
+}
+
+static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	int printed;
+
+	if (ops->locked.ins == NULL)
+		return ins__raw_scnprintf(ins, bf, size, ops);
+
+	printed = scnprintf(bf, size, "%-6.6s ", ins->name);
+	return printed + ins__scnprintf(ops->locked.ins, bf + printed,
+					size - printed, ops->locked.ops);
+}
+
+static void lock__delete(struct ins_operands *ops)
+{
+	free(ops->locked.ops);
+	free(ops->target.raw);
+	free(ops->target.name);
+}
+
+static struct ins_ops lock_ops = {
+	.free	   = lock__delete,
+	.parse	   = lock__parse,
+	.scnprintf = lock__scnprintf,
+};
+
+static int mov__parse(struct ins_operands *ops)
+{
+	char *s = strchr(ops->raw, ','), *target, *comment, prev;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+	
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+
+	while (s[0] != '\0' && !isspace(s[0]))
+		++s;
+	prev = *s;
+	*s = '\0';
+
+	ops->target.raw = strdup(target);
+	*s = prev;
+
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	comment = strchr(s, '#');
+	if (comment == NULL)
+		return 0;
+
+	while (comment[0] != '\0' && isspace(comment[0]))
+		++comment;
+
+	comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name);
+	comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name);
+
+	return 0;
+
+out_free_source:
+	free(ops->source.raw);
+	ops->source.raw = NULL;
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6.6s %s,%s", ins->name,
+			 ops->source.name ?: ops->source.raw,
+			 ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops mov_ops = {
+	.parse	   = mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
+static int dec__parse(struct ins_operands *ops)
+{
+	char *target, *comment, *s, prev;
+
+	target = s = ops->raw;
+
+	while (s[0] != '\0' && !isspace(s[0]))
+		++s;
+	prev = *s;
+	*s = '\0';
+
+	ops->target.raw = strdup(target);
+	*s = prev;
+
+	if (ops->target.raw == NULL)
+		return -1;
+
+	comment = strchr(s, '#');
+	if (comment == NULL)
+		return 0;
+
+	while (comment[0] != '\0' && isspace(comment[0]))
+		++comment;
+
+	comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name);
+
+	return 0;
+}
+
+static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6.6s %s", ins->name,
+			 ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops dec_ops = {
+	.parse	   = dec__parse,
+	.scnprintf = dec__scnprintf,
+};
+
+static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size,
+			  struct ins_operands *ops __used)
+{
+	return scnprintf(bf, size, "%-6.6s", "nop");
+}
+
+static struct ins_ops nop_ops = {
+	.scnprintf = nop__scnprintf,
+};
+
+/*
+ * Must be sorted by name!
+ */
+static struct ins instructions[] = {
+	{ .name = "add",   .ops  = &mov_ops, },
+	{ .name = "addl",  .ops  = &mov_ops, },
+	{ .name = "addq",  .ops  = &mov_ops, },
+	{ .name = "addw",  .ops  = &mov_ops, },
+	{ .name = "and",   .ops  = &mov_ops, },
+	{ .name = "bts",   .ops  = &mov_ops, },
+	{ .name = "call",  .ops  = &call_ops, },
+	{ .name = "callq", .ops  = &call_ops, },
+	{ .name = "cmp",   .ops  = &mov_ops, },
+	{ .name = "cmpb",  .ops  = &mov_ops, },
+	{ .name = "cmpl",  .ops  = &mov_ops, },
+	{ .name = "cmpq",  .ops  = &mov_ops, },
+	{ .name = "cmpw",  .ops  = &mov_ops, },
+	{ .name = "cmpxch", .ops  = &mov_ops, },
+	{ .name = "dec",   .ops  = &dec_ops, },
+	{ .name = "decl",  .ops  = &dec_ops, },
+	{ .name = "imul",  .ops  = &mov_ops, },
+	{ .name = "inc",   .ops  = &dec_ops, },
+	{ .name = "incl",  .ops  = &dec_ops, },
+	{ .name = "ja",	   .ops  = &jump_ops, },
+	{ .name = "jae",   .ops  = &jump_ops, },
+	{ .name = "jb",	   .ops  = &jump_ops, },
+	{ .name = "jbe",   .ops  = &jump_ops, },
+	{ .name = "jc",	   .ops  = &jump_ops, },
+	{ .name = "jcxz",  .ops  = &jump_ops, },
+	{ .name = "je",	   .ops  = &jump_ops, },
+	{ .name = "jecxz", .ops  = &jump_ops, },
+	{ .name = "jg",	   .ops  = &jump_ops, },
+	{ .name = "jge",   .ops  = &jump_ops, },
+	{ .name = "jl",    .ops  = &jump_ops, },
+	{ .name = "jle",   .ops  = &jump_ops, },
+	{ .name = "jmp",   .ops  = &jump_ops, },
+	{ .name = "jmpq",  .ops  = &jump_ops, },
+	{ .name = "jna",   .ops  = &jump_ops, },
+	{ .name = "jnae",  .ops  = &jump_ops, },
+	{ .name = "jnb",   .ops  = &jump_ops, },
+	{ .name = "jnbe",  .ops  = &jump_ops, },
+	{ .name = "jnc",   .ops  = &jump_ops, },
+	{ .name = "jne",   .ops  = &jump_ops, },
+	{ .name = "jng",   .ops  = &jump_ops, },
+	{ .name = "jnge",  .ops  = &jump_ops, },
+	{ .name = "jnl",   .ops  = &jump_ops, },
+	{ .name = "jnle",  .ops  = &jump_ops, },
+	{ .name = "jno",   .ops  = &jump_ops, },
+	{ .name = "jnp",   .ops  = &jump_ops, },
+	{ .name = "jns",   .ops  = &jump_ops, },
+	{ .name = "jnz",   .ops  = &jump_ops, },
+	{ .name = "jo",	   .ops  = &jump_ops, },
+	{ .name = "jp",	   .ops  = &jump_ops, },
+	{ .name = "jpe",   .ops  = &jump_ops, },
+	{ .name = "jpo",   .ops  = &jump_ops, },
+	{ .name = "jrcxz", .ops  = &jump_ops, },
+	{ .name = "js",	   .ops  = &jump_ops, },
+	{ .name = "jz",	   .ops  = &jump_ops, },
+	{ .name = "lea",   .ops  = &mov_ops, },
+	{ .name = "lock",  .ops  = &lock_ops, },
+	{ .name = "mov",   .ops  = &mov_ops, },
+	{ .name = "movb",  .ops  = &mov_ops, },
+	{ .name = "movdqa",.ops  = &mov_ops, },
+	{ .name = "movl",  .ops  = &mov_ops, },
+	{ .name = "movq",  .ops  = &mov_ops, },
+	{ .name = "movslq", .ops  = &mov_ops, },
+	{ .name = "movzbl", .ops  = &mov_ops, },
+	{ .name = "movzwl", .ops  = &mov_ops, },
+	{ .name = "nop",   .ops  = &nop_ops, },
+	{ .name = "nopl",  .ops  = &nop_ops, },
+	{ .name = "nopw",  .ops  = &nop_ops, },
+	{ .name = "or",    .ops  = &mov_ops, },
+	{ .name = "orl",   .ops  = &mov_ops, },
+	{ .name = "test",  .ops  = &mov_ops, },
+	{ .name = "testb", .ops  = &mov_ops, },
+	{ .name = "testl", .ops  = &mov_ops, },
+	{ .name = "xadd",  .ops  = &mov_ops, },
+};
+
+static int ins__cmp(const void *name, const void *insp)
+{
+	const struct ins *ins = insp;
+
+	return strcmp(name, ins->name);
+}
+
+static struct ins *ins__find(const char *name)
+{
+	const int nmemb = ARRAY_SIZE(instructions);
+
+	return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp);
+}
+
 int symbol__annotate_init(struct map *map __used, struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
@@ -28,7 +425,7 @@
 int symbol__alloc_hist(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	const size_t size = sym->end - sym->start + 1;
+	const size_t size = symbol__size(sym);
 	size_t sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(u64));
 
 	notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist);
@@ -78,31 +475,110 @@
 	return 0;
 }
 
-static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
+static void disasm_line__init_ins(struct disasm_line *dl)
 {
-	struct objdump_line *self = malloc(sizeof(*self) + privsize);
+	dl->ins = ins__find(dl->name);
 
-	if (self != NULL) {
-		self->offset = offset;
-		self->line = line;
+	if (dl->ins == NULL)
+		return;
+
+	if (!dl->ins->ops)
+		return;
+
+	if (dl->ins->ops->parse)
+		dl->ins->ops->parse(&dl->ops);
+}
+
+static int disasm_line__parse(char *line, char **namep, char **rawp)
+{
+	char *name = line, tmp;
+
+	while (isspace(name[0]))
+		++name;
+
+	if (name[0] == '\0')
+		return -1;
+
+	*rawp = name + 1;
+
+	while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
+		++*rawp;
+
+	tmp = (*rawp)[0];
+	(*rawp)[0] = '\0';
+	*namep = strdup(name);
+
+	if (*namep == NULL)
+		goto out_free_name;
+
+	(*rawp)[0] = tmp;
+
+	if ((*rawp)[0] != '\0') {
+		(*rawp)++;
+		while (isspace((*rawp)[0]))
+			++(*rawp);
 	}
 
-	return self;
+	return 0;
+
+out_free_name:
+	free(*namep);
+	*namep = NULL;
+	return -1;
 }
 
-void objdump_line__free(struct objdump_line *self)
+static struct disasm_line *disasm_line__new(s64 offset, char *line, size_t privsize)
 {
-	free(self->line);
-	free(self);
+	struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
+
+	if (dl != NULL) {
+		dl->offset = offset;
+		dl->line = strdup(line);
+		if (dl->line == NULL)
+			goto out_delete;
+
+		if (offset != -1) {
+			if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
+				goto out_free_line;
+
+			disasm_line__init_ins(dl);
+		}
+	}
+
+	return dl;
+
+out_free_line:
+	free(dl->line);
+out_delete:
+	free(dl);
+	return NULL;
 }
 
-static void objdump__add_line(struct list_head *head, struct objdump_line *line)
+void disasm_line__free(struct disasm_line *dl)
+{
+	free(dl->line);
+	free(dl->name);
+	if (dl->ins && dl->ins->ops->free)
+		dl->ins->ops->free(&dl->ops);
+	else
+		ins__delete(&dl->ops);
+	free(dl);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
+{
+	if (raw || !dl->ins)
+		return scnprintf(bf, size, "%-6.6s %s", dl->name, dl->ops.raw);
+
+	return ins__scnprintf(dl->ins, bf, size, &dl->ops);
+}
+
+static void disasm__add(struct list_head *head, struct disasm_line *line)
 {
 	list_add_tail(&line->node, head);
 }
 
-struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
-					       struct objdump_line *pos)
+struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos)
 {
 	list_for_each_entry_continue(pos, head, node)
 		if (pos->offset >= 0)
@@ -111,15 +587,14 @@
 	return NULL;
 }
 
-static int objdump_line__print(struct objdump_line *oline, struct symbol *sym,
-			       int evidx, u64 len, int min_pcnt,
-			       int printed, int max_lines,
-			       struct objdump_line *queue)
+static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
+		      int evidx, u64 len, int min_pcnt, int printed,
+		      int max_lines, struct disasm_line *queue)
 {
 	static const char *prev_line;
 	static const char *prev_color;
 
-	if (oline->offset != -1) {
+	if (dl->offset != -1) {
 		const char *path = NULL;
 		unsigned int hits = 0;
 		double percent = 0.0;
@@ -127,10 +602,11 @@
 		struct annotation *notes = symbol__annotation(sym);
 		struct source_line *src_line = notes->src->lines;
 		struct sym_hist *h = annotation__histogram(notes, evidx);
-		s64 offset = oline->offset;
-		struct objdump_line *next;
+		s64 offset = dl->offset;
+		const u64 addr = start + offset;
+		struct disasm_line *next;
 
-		next = objdump__get_next_ip_line(&notes->src->source, oline);
+		next = disasm__get_next_ip_line(&notes->src->source, dl);
 
 		while (offset < (s64)len &&
 		       (next == NULL || offset < next->offset)) {
@@ -155,9 +631,9 @@
 
 		if (queue != NULL) {
 			list_for_each_entry_from(queue, &notes->src->source, node) {
-				if (queue == oline)
+				if (queue == dl)
 					break;
-				objdump_line__print(queue, sym, evidx, len,
+				disasm_line__print(queue, sym, start, evidx, len,
 						    0, 0, 1, NULL);
 			}
 		}
@@ -180,17 +656,18 @@
 
 		color_fprintf(stdout, color, " %7.2f", percent);
 		printf(" :	");
-		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line);
+		color_fprintf(stdout, PERF_COLOR_MAGENTA, "  %" PRIx64 ":", addr);
+		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line);
 	} else if (max_lines && printed >= max_lines)
 		return 1;
 	else {
 		if (queue)
 			return -1;
 
-		if (!*oline->line)
+		if (!*dl->line)
 			printf("         :\n");
 		else
-			printf("         :	%s\n", oline->line);
+			printf("         :	%s\n", dl->line);
 	}
 
 	return 0;
@@ -200,8 +677,8 @@
 				      FILE *file, size_t privsize)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	struct objdump_line *objdump_line;
-	char *line = NULL, *tmp, *tmp2, *c;
+	struct disasm_line *dl;
+	char *line = NULL, *parsed_line, *tmp, *tmp2, *c;
 	size_t line_len;
 	s64 line_ip, offset = -1;
 
@@ -219,6 +696,7 @@
 		*c = 0;
 
 	line_ip = -1;
+	parsed_line = line;
 
 	/*
 	 * Strip leading spaces:
@@ -246,14 +724,17 @@
 		offset = line_ip - start;
 		if (offset < 0 || (u64)line_ip > end)
 			offset = -1;
+		else
+			parsed_line = tmp2 + 1;
 	}
 
-	objdump_line = objdump_line__new(offset, line, privsize);
-	if (objdump_line == NULL) {
-		free(line);
+	dl = disasm_line__new(offset, parsed_line, privsize);
+	free(line);
+
+	if (dl == NULL)
 		return -1;
-	}
-	objdump__add_line(&notes->src->source, objdump_line);
+
+	disasm__add(&notes->src->source, dl);
 
 	return 0;
 }
@@ -476,7 +957,7 @@
 {
 	struct annotation *notes = symbol__annotation(sym);
 	struct sym_hist *h = annotation__histogram(notes, evidx);
-	u64 len = sym->end - sym->start, offset;
+	u64 len = symbol__size(sym), offset;
 
 	for (offset = 0; offset < len; ++offset)
 		if (h->addr[offset] != 0)
@@ -492,7 +973,8 @@
 	struct dso *dso = map->dso;
 	const char *filename = dso->long_name, *d_filename;
 	struct annotation *notes = symbol__annotation(sym);
-	struct objdump_line *pos, *queue = NULL;
+	struct disasm_line *pos, *queue = NULL;
+	u64 start = map__rip_2objdump(map, sym->start);
 	int printed = 2, queue_len = 0;
 	int more = 0;
 	u64 len;
@@ -502,7 +984,7 @@
 	else
 		d_filename = basename(filename);
 
-	len = sym->end - sym->start;
+	len = symbol__size(sym);
 
 	printf(" Percent |	Source code & Disassembly of %s\n", d_filename);
 	printf("------------------------------------------------\n");
@@ -516,8 +998,9 @@
 			queue_len = 0;
 		}
 
-		switch (objdump_line__print(pos, sym, evidx, len, min_pcnt,
-					    printed, max_lines, queue)) {
+		switch (disasm_line__print(pos, sym, start, evidx, len,
+					    min_pcnt, printed, max_lines,
+					    queue)) {
 		case 0:
 			++printed;
 			if (context) {
@@ -561,7 +1044,7 @@
 {
 	struct annotation *notes = symbol__annotation(sym);
 	struct sym_hist *h = annotation__histogram(notes, evidx);
-	int len = sym->end - sym->start, offset;
+	int len = symbol__size(sym), offset;
 
 	h->sum = 0;
 	for (offset = 0; offset < len; ++offset) {
@@ -570,16 +1053,44 @@
 	}
 }
 
-void objdump_line_list__purge(struct list_head *head)
+void disasm__purge(struct list_head *head)
 {
-	struct objdump_line *pos, *n;
+	struct disasm_line *pos, *n;
 
 	list_for_each_entry_safe(pos, n, head, node) {
 		list_del(&pos->node);
-		objdump_line__free(pos);
+		disasm_line__free(pos);
 	}
 }
 
+static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
+{
+	size_t printed;
+
+	if (dl->offset == -1)
+		return fprintf(fp, "%s\n", dl->line);
+
+	printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->name);
+
+	if (dl->ops.raw[0] != '\0') {
+		printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ",
+				   dl->ops.raw);
+	}
+
+	return printed + fprintf(fp, "\n");
+}
+
+size_t disasm__fprintf(struct list_head *head, FILE *fp)
+{
+	struct disasm_line *pos;
+	size_t printed = 0;
+
+	list_for_each_entry(pos, head, node)
+		printed += disasm_line__fprintf(pos, fp);
+
+	return printed;
+}
+
 int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
 			 bool print_lines, bool full_paths, int min_pcnt,
 			 int max_lines)
@@ -592,7 +1103,7 @@
 	if (symbol__annotate(sym, map, 0) < 0)
 		return -1;
 
-	len = sym->end - sym->start;
+	len = symbol__size(sym);
 
 	if (print_lines) {
 		symbol__get_source_line(sym, map, evidx, &source_line,
@@ -605,7 +1116,7 @@
 	if (print_lines)
 		symbol__free_source_line(sym, len);
 
-	objdump_line_list__purge(&symbol__annotation(sym)->src->source);
+	disasm__purge(&symbol__annotation(sym)->src->source);
 
 	return 0;
 }
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index efa5dc8..78a5692 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -2,20 +2,69 @@
 #define __PERF_ANNOTATE_H
 
 #include <stdbool.h>
+#include <stdint.h>
 #include "types.h"
 #include "symbol.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
 
-struct objdump_line {
-	struct list_head node;
-	s64		 offset;
-	char		 *line;
+struct ins;
+
+struct ins_operands {
+	char	*raw;
+	struct {
+		char	*raw;
+		char	*name;
+		u64	addr;
+		u64	offset;
+	} target;
+	union {
+		struct {
+			char	*raw;
+			char	*name;
+			u64	addr;
+		} source;
+		struct {
+			struct ins *ins;
+			struct ins_operands *ops;
+		} locked;
+	};
 };
 
-void objdump_line__free(struct objdump_line *self);
-struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
-					       struct objdump_line *pos);
+struct ins_ops {
+	void (*free)(struct ins_operands *ops);
+	int (*parse)(struct ins_operands *ops);
+	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
+			 struct ins_operands *ops);
+};
+
+struct ins {
+	const char     *name;
+	struct ins_ops *ops;
+};
+
+bool ins__is_jump(const struct ins *ins);
+bool ins__is_call(const struct ins *ins);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+
+struct disasm_line {
+	struct list_head    node;
+	s64		    offset;
+	char		    *line;
+	char		    *name;
+	struct ins	    *ins;
+	struct ins_operands ops;
+};
+
+static inline bool disasm_line__has_offset(const struct disasm_line *dl)
+{
+	return dl->ops.target.offset != UINT64_MAX;
+}
+
+void disasm_line__free(struct disasm_line *dl);
+struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos);
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
+size_t disasm__fprintf(struct list_head *head, FILE *fp);
 
 struct sym_hist {
 	u64		sum;
@@ -32,7 +81,7 @@
  *
  * @histogram: Array of addr hit histograms per event being monitored
  * @lines: If 'print_lines' is specified, per source code line percentages
- * @source: source parsed from objdump -dS
+ * @source: source parsed from a disassembler like objdump -dS
  *
  * lines is allocated, percentages calculated and all sorted by percentage
  * when the annotation is about to be presented, so the percentages are for
@@ -82,7 +131,7 @@
 			    int context);
 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
-void objdump_line_list__purge(struct list_head *head);
+void disasm__purge(struct list_head *head);
 
 int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
 			 bool print_lines, bool full_paths, int min_pcnt,
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 8dd224d..cff18c6 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -33,7 +33,7 @@
 
 extern int use_browser;
 
-#ifdef NO_NEWT_SUPPORT
+#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)
 static inline void setup_browser(bool fallback_to_pager)
 {
 	if (fallback_to_pager)
@@ -43,19 +43,29 @@
 #else
 void setup_browser(bool fallback_to_pager);
 void exit_browser(bool wait_for_ok);
+
+#ifdef NO_NEWT_SUPPORT
+static inline int ui__init(void)
+{
+	return -1;
+}
+static inline void ui__exit(bool wait_for_ok __used) {}
+#else
+int ui__init(void);
+void ui__exit(bool wait_for_ok);
 #endif
 
 #ifdef NO_GTK2_SUPPORT
-static inline void perf_gtk_setup_browser(int argc __used, const char *argv[] __used, bool fallback_to_pager)
+static inline int perf_gtk__init(void)
 {
-	if (fallback_to_pager)
-		setup_pager();
+	return -1;
 }
-static inline void perf_gtk_exit_browser(bool wait_for_ok __used) {}
+static inline void perf_gtk__exit(bool wait_for_ok __used) {}
 #else
-void perf_gtk_setup_browser(int argc, const char *argv[], bool fallback_to_pager);
-void perf_gtk_exit_browser(bool wait_for_ok);
+int perf_gtk__init(void);
+void perf_gtk__exit(bool wait_for_ok);
 #endif
+#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 26817da..efb1fce 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -11,6 +11,7 @@
 #include "event.h"
 #include "debug.h"
 #include "util.h"
+#include "target.h"
 
 int verbose;
 bool dump_trace = false, quiet = false;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index f2ce88d..6bebe7f 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -26,7 +26,7 @@
 #else
 extern char ui_helpline__last_msg[];
 int ui_helpline__show_help(const char *format, va_list ap);
-#include "ui/progress.h"
+#include "../ui/progress.h"
 int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
 #endif
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1986d80..4ac5f5a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -11,6 +11,7 @@
 #include <poll.h>
 #include "cpumap.h"
 #include "thread_map.h"
+#include "target.h"
 #include "evlist.h"
 #include "evsel.h"
 #include <unistd.h>
@@ -599,18 +600,21 @@
 	return perf_evlist__mmap_per_cpu(evlist, prot, mask);
 }
 
-int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
-			     const char *target_tid, uid_t uid, const char *cpu_list)
+int perf_evlist__create_maps(struct perf_evlist *evlist,
+			     struct perf_target *target)
 {
-	evlist->threads = thread_map__new_str(target_pid, target_tid, uid);
+	evlist->threads = thread_map__new_str(target->pid, target->tid,
+					      target->uid);
 
 	if (evlist->threads == NULL)
 		return -1;
 
-	if (uid != UINT_MAX || (cpu_list == NULL && target_tid))
+	if (perf_target__has_task(target))
+		evlist->cpus = cpu_map__dummy_new();
+	else if (!perf_target__has_cpu(target) && !target->uses_mmap)
 		evlist->cpus = cpu_map__dummy_new();
 	else
-		evlist->cpus = cpu_map__new(cpu_list);
+		evlist->cpus = cpu_map__new(target->cpu_list);
 
 	if (evlist->cpus == NULL)
 		goto out_delete_threads;
@@ -827,7 +831,7 @@
 		exit(-1);
 	}
 
-	if (!opts->system_wide && !opts->target_tid && !opts->target_pid)
+	if (perf_target__none(&opts->target))
 		evlist->threads->map[0] = evlist->workload.pid;
 
 	close(child_ready_pipe[1]);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 21f1c9e..58abb63 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -106,8 +106,8 @@
 	evlist->threads	= threads;
 }
 
-int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
-			     const char *tid, uid_t uid, const char *cpu_list);
+int perf_evlist__create_maps(struct perf_evlist *evlist,
+			     struct perf_target *target);
 void perf_evlist__delete_maps(struct perf_evlist *evlist);
 int perf_evlist__set_filters(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8c13dbc..d7a2b4b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -14,6 +14,7 @@
 #include "util.h"
 #include "cpumap.h"
 #include "thread_map.h"
+#include "target.h"
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
@@ -106,15 +107,15 @@
 	if (opts->call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
-	if (opts->system_wide)
+	if (opts->target.system_wide)
 		attr->sample_type	|= PERF_SAMPLE_CPU;
 
 	if (opts->period)
 		attr->sample_type	|= PERF_SAMPLE_PERIOD;
 
 	if (!opts->sample_id_all_missing &&
-	    (opts->sample_time || opts->system_wide ||
-	     !opts->no_inherit || opts->cpu_list))
+	    (opts->sample_time || !opts->no_inherit ||
+	     perf_target__has_cpu(&opts->target)))
 		attr->sample_type	|= PERF_SAMPLE_TIME;
 
 	if (opts->raw_samples) {
@@ -135,7 +136,7 @@
 	attr->mmap = track;
 	attr->comm = track;
 
-	if (!opts->target_pid && !opts->target_tid && !opts->system_wide &&
+	if (perf_target__none(&opts->target) &&
 	    (!opts->group || evsel == first)) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c0b70c6..5385980 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -31,21 +31,16 @@
 
 int perf_header__push_event(u64 id, const char *name)
 {
+	struct perf_trace_event_type *nevents;
+
 	if (strlen(name) > MAX_EVENT_NAME)
 		pr_warning("Event %s will be truncated\n", name);
 
-	if (!events) {
-		events = malloc(sizeof(struct perf_trace_event_type));
-		if (events == NULL)
-			return -ENOMEM;
-	} else {
-		struct perf_trace_event_type *nevents;
+	nevents = realloc(events, (event_count + 1) * sizeof(*events));
+	if (nevents == NULL)
+		return -ENOMEM;
+	events = nevents;
 
-		nevents = realloc(events, (event_count + 1) * sizeof(*events));
-		if (nevents == NULL)
-			return -ENOMEM;
-		events = nevents;
-	}
 	memset(&events[event_count], 0, sizeof(struct perf_trace_event_type));
 	events[event_count].event_id = id;
 	strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9f6d630..1293b5e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -599,7 +599,7 @@
 	if (chain->ms.sym)
 		ret += fprintf(fp, "%s\n", chain->ms.sym->name);
 	else
-		ret += fprintf(fp, "%p\n", (void *)(long)chain->ip);
+		ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip);
 
 	return ret;
 }
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2cae9df..cfc64e2 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -138,7 +138,7 @@
 #define K_LEFT -1
 #define K_RIGHT -2
 #else
-#include "ui/keysyms.h"
+#include "../ui/keysyms.h"
 int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
 			     void(*timer)(void *arg), void *arg, int delay_secs);
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5b3a0ef..c7fc18a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -593,17 +593,27 @@
 static int config_term(struct perf_event_attr *attr,
 		       struct parse_events__term *term)
 {
-	switch (term->type) {
+#define CHECK_TYPE_VAL(type)					\
+do {								\
+	if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val)	\
+		return -EINVAL;					\
+} while (0)
+
+	switch (term->type_term) {
 	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+		CHECK_TYPE_VAL(NUM);
 		attr->config = term->val.num;
 		break;
 	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+		CHECK_TYPE_VAL(NUM);
 		attr->config1 = term->val.num;
 		break;
 	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+		CHECK_TYPE_VAL(NUM);
 		attr->config2 = term->val.num;
 		break;
 	case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+		CHECK_TYPE_VAL(NUM);
 		attr->sample_period = term->val.num;
 		break;
 	case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
@@ -615,7 +625,9 @@
 	default:
 		return -EINVAL;
 	}
+
 	return 0;
+#undef CHECK_TYPE_VAL
 }
 
 static int config_attr(struct perf_event_attr *attr,
@@ -1015,11 +1027,12 @@
 
 int parse_events__is_hardcoded_term(struct parse_events__term *term)
 {
-	return term->type <= PARSE_EVENTS__TERM_TYPE_HARDCODED_MAX;
+	return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
 }
 
-int parse_events__new_term(struct parse_events__term **_term, int type,
-			   char *config, char *str, long num)
+static int new_term(struct parse_events__term **_term, int type_val,
+		    int type_term, char *config,
+		    char *str, long num)
 {
 	struct parse_events__term *term;
 
@@ -1028,15 +1041,11 @@
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&term->list);
-	term->type = type;
+	term->type_val  = type_val;
+	term->type_term = type_term;
 	term->config = config;
 
-	switch (type) {
-	case PARSE_EVENTS__TERM_TYPE_CONFIG:
-	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
-	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
-	case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
-	case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+	switch (type_val) {
 	case PARSE_EVENTS__TERM_TYPE_NUM:
 		term->val.num = num;
 		break;
@@ -1051,6 +1060,20 @@
 	return 0;
 }
 
+int parse_events__term_num(struct parse_events__term **term,
+			   int type_term, char *config, long num)
+{
+	return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
+			config, NULL, num);
+}
+
+int parse_events__term_str(struct parse_events__term **term,
+			   int type_term, char *config, char *str)
+{
+	return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
+			config, str, 0);
+}
+
 void parse_events__free_terms(struct list_head *terms)
 {
 	struct parse_events__term *term, *h;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index ca069f8..3fddd61 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -4,7 +4,9 @@
  * Parse symbolic events/counts passed in as options:
  */
 
+#include <stdbool.h>
 #include "../../../include/linux/perf_event.h"
+#include "types.h"
 
 struct list_head;
 struct perf_evsel;
@@ -34,16 +36,17 @@
 #define EVENTS_HELP_MAX (128*1024)
 
 enum {
+	PARSE_EVENTS__TERM_TYPE_NUM,
+	PARSE_EVENTS__TERM_TYPE_STR,
+};
+
+enum {
+	PARSE_EVENTS__TERM_TYPE_USER,
 	PARSE_EVENTS__TERM_TYPE_CONFIG,
 	PARSE_EVENTS__TERM_TYPE_CONFIG1,
 	PARSE_EVENTS__TERM_TYPE_CONFIG2,
 	PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
 	PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
-	PARSE_EVENTS__TERM_TYPE_NUM,
-	PARSE_EVENTS__TERM_TYPE_STR,
-
-	PARSE_EVENTS__TERM_TYPE_HARDCODED_MAX =
-		PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
 };
 
 struct parse_events__term {
@@ -52,14 +55,16 @@
 		char *str;
 		long  num;
 	} val;
-	int type;
-
+	int type_val;
+	int type_term;
 	struct list_head list;
 };
 
 int parse_events__is_hardcoded_term(struct parse_events__term *term);
-int parse_events__new_term(struct parse_events__term **term, int type,
-			   char *config, char *str, long num);
+int parse_events__term_num(struct parse_events__term **_term,
+			   int type_term, char *config, long num);
+int parse_events__term_str(struct parse_events__term **_term,
+			   int type_term, char *config, char *str);
 void parse_events__free_terms(struct list_head *terms);
 int parse_events_modifier(struct list_head *list __used, char *str __used);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d9637da..936913e 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -176,8 +176,8 @@
 {
 	struct parse_events__term *term;
 
-	ABORT_ON(parse_events__new_term(&term, PARSE_EVENTS__TERM_TYPE_STR,
-		 $1, $3, 0));
+	ABORT_ON(parse_events__term_str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $3));
 	$$ = term;
 }
 |
@@ -185,8 +185,8 @@
 {
 	struct parse_events__term *term;
 
-	ABORT_ON(parse_events__new_term(&term, PARSE_EVENTS__TERM_TYPE_NUM,
-		 $1, NULL, $3));
+	ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $3));
 	$$ = term;
 }
 |
@@ -194,8 +194,8 @@
 {
 	struct parse_events__term *term;
 
-	ABORT_ON(parse_events__new_term(&term, PARSE_EVENTS__TERM_TYPE_NUM,
-		 $1, NULL, 1));
+	ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, 1));
 	$$ = term;
 }
 |
@@ -203,7 +203,7 @@
 {
 	struct parse_events__term *term;
 
-	ABORT_ON(parse_events__new_term(&term, $1, NULL, NULL, $3));
+	ABORT_ON(parse_events__term_num(&term, $1, NULL, $3));
 	$$ = term;
 }
 |
@@ -211,7 +211,7 @@
 {
 	struct parse_events__term *term;
 
-	ABORT_ON(parse_events__new_term(&term, $1, NULL, NULL, 1));
+	ABORT_ON(parse_events__term_num(&term, $1, NULL, 1));
 	$$ = term;
 }
 
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index cb08a11..8ee219b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -225,7 +225,7 @@
 	if (parse_events__is_hardcoded_term(term))
 		return 0;
 
-	if (term->type != PARSE_EVENTS__TERM_TYPE_NUM)
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM)
 		return -EINVAL;
 
 	format = pmu_find_format(formats, term->config);
@@ -246,6 +246,11 @@
 		return -EINVAL;
 	}
 
+	/*
+	 * XXX If we ever decide to go with string values for
+	 * non-hardcoded terms, here's the place to translate
+	 * them into value.
+	 */
 	*vp |= pmu_format_value(format->bits, term->val.num);
 	return 0;
 }
@@ -324,49 +329,58 @@
 /* Simulated users input. */
 static struct parse_events__term test_terms[] = {
 	{
-		.config  = (char *) "krava01",
-		.val.num = 15,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava01",
+		.val.num   = 15,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava02",
-		.val.num = 170,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava02",
+		.val.num   = 170,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava03",
-		.val.num = 1,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava03",
+		.val.num   = 1,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava11",
-		.val.num = 27,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava11",
+		.val.num   = 27,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava12",
-		.val.num = 1,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava12",
+		.val.num   = 1,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava13",
-		.val.num = 2,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava13",
+		.val.num   = 2,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava21",
-		.val.num = 119,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava21",
+		.val.num   = 119,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava22",
-		.val.num = 11,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava22",
+		.val.num   = 11,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 	{
-		.config  = (char *) "krava23",
-		.val.num = 2,
-		.type    = PARSE_EVENTS__TERM_TYPE_NUM,
+		.config    = (char *) "krava23",
+		.val.num   = 2,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 };
 #define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term))
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1efd3be..4dcc8f3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1108,16 +1108,10 @@
 	}
 
 	if ((skip = perf_session__process_event(self, &event, tool, head)) < 0) {
-		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
-			    head, event.header.size, event.header.type);
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+		       head, event.header.size, event.header.type);
+		err = -EINVAL;
+		goto out_err;
 	}
 
 	head += size;
@@ -1226,17 +1220,11 @@
 
 	if (size == 0 ||
 	    perf_session__process_event(session, event, tool, file_pos) < 0) {
-		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
-			    file_offset + head, event->header.size,
-			    event->header.type);
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+		       file_offset + head, event->header.size,
+		       event->header.type);
+		err = -EINVAL;
+		goto out_err;
 	}
 
 	head += size;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ac49ef2..1f00388 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -65,6 +65,11 @@
 
 void symbol__delete(struct symbol *sym);
 
+static inline size_t symbol__size(const struct symbol *sym)
+{
+	return sym->end - sym->start + 1;
+}
+
 struct strlist;
 
 struct symbol_conf {
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
new file mode 100644
index 0000000..1064d5b
--- /dev/null
+++ b/tools/perf/util/target.c
@@ -0,0 +1,142 @@
+/*
+ * Helper functions for handling target threads/cpus
+ *
+ * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com>
+ *
+ * Released under the GPL v2.
+ */
+
+#include "target.h"
+#include "debug.h"
+
+#include <pwd.h>
+#include <string.h>
+
+
+enum perf_target_errno perf_target__validate(struct perf_target *target)
+{
+	enum perf_target_errno ret = PERF_ERRNO_TARGET__SUCCESS;
+
+	if (target->pid)
+		target->tid = target->pid;
+
+	/* CPU and PID are mutually exclusive */
+	if (target->tid && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == PERF_ERRNO_TARGET__SUCCESS)
+			ret = PERF_ERRNO_TARGET__PID_OVERRIDE_CPU;
+	}
+
+	/* UID and PID are mutually exclusive */
+	if (target->tid && target->uid_str) {
+		target->uid_str = NULL;
+		if (ret == PERF_ERRNO_TARGET__SUCCESS)
+			ret = PERF_ERRNO_TARGET__PID_OVERRIDE_UID;
+	}
+
+	/* UID and CPU are mutually exclusive */
+	if (target->uid_str && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == PERF_ERRNO_TARGET__SUCCESS)
+			ret = PERF_ERRNO_TARGET__UID_OVERRIDE_CPU;
+	}
+
+	/* PID and SYSTEM are mutually exclusive */
+	if (target->tid && target->system_wide) {
+		target->system_wide = false;
+		if (ret == PERF_ERRNO_TARGET__SUCCESS)
+			ret = PERF_ERRNO_TARGET__PID_OVERRIDE_SYSTEM;
+	}
+
+	/* UID and SYSTEM are mutually exclusive */
+	if (target->uid_str && target->system_wide) {
+		target->system_wide = false;
+		if (ret == PERF_ERRNO_TARGET__SUCCESS)
+			ret = PERF_ERRNO_TARGET__UID_OVERRIDE_SYSTEM;
+	}
+
+	return ret;
+}
+
+enum perf_target_errno perf_target__parse_uid(struct perf_target *target)
+{
+	struct passwd pwd, *result;
+	char buf[1024];
+	const char *str = target->uid_str;
+
+	target->uid = UINT_MAX;
+	if (str == NULL)
+		return PERF_ERRNO_TARGET__SUCCESS;
+
+	/* Try user name first */
+	getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+	if (result == NULL) {
+		/*
+		 * The user name not found. Maybe it's a UID number.
+		 */
+		char *endptr;
+		int uid = strtol(str, &endptr, 10);
+
+		if (*endptr != '\0')
+			return PERF_ERRNO_TARGET__INVALID_UID;
+
+		getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+		if (result == NULL)
+			return PERF_ERRNO_TARGET__USER_NOT_FOUND;
+	}
+
+	target->uid = result->pw_uid;
+	return PERF_ERRNO_TARGET__SUCCESS;
+}
+
+/*
+ * This must have a same ordering as the enum perf_target_errno.
+ */
+static const char *perf_target__error_str[] = {
+	"PID/TID switch overriding CPU",
+	"PID/TID switch overriding UID",
+	"UID switch overriding CPU",
+	"PID/TID switch overriding SYSTEM",
+	"UID switch overriding SYSTEM",
+	"Invalid User: %s",
+	"Problems obtaining information for user %s",
+};
+
+int perf_target__strerror(struct perf_target *target, int errnum,
+			  char *buf, size_t buflen)
+{
+	int idx;
+	const char *msg;
+
+	if (errnum >= 0) {
+		strerror_r(errnum, buf, buflen);
+		return 0;
+	}
+
+	if (errnum <  __PERF_ERRNO_TARGET__START ||
+	    errnum >= __PERF_ERRNO_TARGET__END)
+		return -1;
+
+	idx = errnum - __PERF_ERRNO_TARGET__START;
+	msg = perf_target__error_str[idx];
+
+	switch (errnum) {
+	case PERF_ERRNO_TARGET__PID_OVERRIDE_CPU
+	 ... PERF_ERRNO_TARGET__UID_OVERRIDE_SYSTEM:
+		snprintf(buf, buflen, "%s", msg);
+		break;
+
+	case PERF_ERRNO_TARGET__INVALID_UID:
+	case PERF_ERRNO_TARGET__USER_NOT_FOUND:
+		snprintf(buf, buflen, msg, target->uid_str);
+		break;
+
+	default:
+		/* cannot reach here */
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
new file mode 100644
index 0000000..a4be857
--- /dev/null
+++ b/tools/perf/util/target.h
@@ -0,0 +1,65 @@
+#ifndef _PERF_TARGET_H
+#define _PERF_TARGET_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct perf_target {
+	const char   *pid;
+	const char   *tid;
+	const char   *cpu_list;
+	const char   *uid_str;
+	uid_t	     uid;
+	bool	     system_wide;
+	bool	     uses_mmap;
+};
+
+enum perf_target_errno {
+	PERF_ERRNO_TARGET__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__PERF_ERRNO_TARGET__START		= -10000,
+
+
+	/* for perf_target__validate() */
+	PERF_ERRNO_TARGET__PID_OVERRIDE_CPU	= __PERF_ERRNO_TARGET__START,
+	PERF_ERRNO_TARGET__PID_OVERRIDE_UID,
+	PERF_ERRNO_TARGET__UID_OVERRIDE_CPU,
+	PERF_ERRNO_TARGET__PID_OVERRIDE_SYSTEM,
+	PERF_ERRNO_TARGET__UID_OVERRIDE_SYSTEM,
+
+	/* for perf_target__parse_uid() */
+	PERF_ERRNO_TARGET__INVALID_UID,
+	PERF_ERRNO_TARGET__USER_NOT_FOUND,
+
+	__PERF_ERRNO_TARGET__END,
+};
+
+enum perf_target_errno perf_target__validate(struct perf_target *target);
+enum perf_target_errno perf_target__parse_uid(struct perf_target *target);
+
+int perf_target__strerror(struct perf_target *target, int errnum, char *buf,
+			  size_t buflen);
+
+static inline bool perf_target__has_task(struct perf_target *target)
+{
+	return target->tid || target->pid || target->uid_str;
+}
+
+static inline bool perf_target__has_cpu(struct perf_target *target)
+{
+	return target->system_wide || target->cpu_list;
+}
+
+static inline bool perf_target__none(struct perf_target *target)
+{
+	return !perf_target__has_task(target) && !perf_target__has_cpu(target);
+}
+
+#endif /* _PERF_TARGET_H */
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 7da80f1..f718df8 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -6,7 +6,7 @@
 
 struct thread_map {
 	int nr;
-	int map[];
+	pid_t map[];
 };
 
 struct thread_map *thread_map__new_by_pid(pid_t pid);
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 09fe579..abe0e8e 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -69,23 +69,24 @@
 
 	ret += SNPRINTF(bf + ret, size - ret, "], ");
 
-	if (top->target_pid)
+	if (top->target.pid)
 		ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
-				top->target_pid);
-	else if (top->target_tid)
+				top->target.pid);
+	else if (top->target.tid)
 		ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
-				top->target_tid);
-	else if (top->uid_str != NULL)
+				top->target.tid);
+	else if (top->target.uid_str != NULL)
 		ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
-				top->uid_str);
+				top->target.uid_str);
 	else
 		ret += SNPRINTF(bf + ret, size - ret, " (all");
 
-	if (top->cpu_list)
+	if (top->target.cpu_list)
 		ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
-				top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
+				top->evlist->cpus->nr > 1 ? "s" : "",
+				top->target.cpu_list);
 	else {
-		if (top->target_tid)
+		if (top->target.tid)
 			ret += SNPRINTF(bf + ret, size - ret, ")");
 		else
 			ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index ce61cb2..33347ca 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -13,6 +13,7 @@
 struct perf_top {
 	struct perf_tool   tool;
 	struct perf_evlist *evlist;
+	struct perf_target target;
 	/*
 	 * Symbols will be added here in perf_event__process_sample and will
 	 * get out after decayed.
@@ -23,10 +24,7 @@
 	u64		   guest_us_samples, guest_kernel_samples;
 	int		   print_entries, count_filter, delay_secs;
 	int		   freq;
-	const char	   *target_pid, *target_tid;
-	uid_t		   uid;
 	bool		   hide_kernel_symbols, hide_user_symbols, zero;
-	bool		   system_wide;
 	bool		   use_tui, use_stdio;
 	bool		   sort_has_symbols;
 	bool		   dont_use_callchains;
@@ -37,7 +35,6 @@
 	bool		   sample_id_all_missing;
 	bool		   exclude_guest_missing;
 	bool		   dump_symtab;
-	const char	   *cpu_list;
 	struct hist_entry  *sym_filter_entry;
 	struct perf_evsel  *sym_evsel;
 	struct perf_session *session;
@@ -47,7 +44,6 @@
 	int		   realtime_prio;
 	int		   sym_pcnt_filter;
 	const char	   *sym_filter;
-	const char	   *uid_str;
 };
 
 size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
deleted file mode 100644
index 57a4c6e..0000000
--- a/tools/perf/util/ui/browsers/annotate.c
+++ /dev/null
@@ -1,433 +0,0 @@
-#include "../../util.h"
-#include "../browser.h"
-#include "../helpline.h"
-#include "../libslang.h"
-#include "../ui.h"
-#include "../util.h"
-#include "../../annotate.h"
-#include "../../hist.h"
-#include "../../sort.h"
-#include "../../symbol.h"
-#include <pthread.h>
-#include <newt.h>
-
-struct annotate_browser {
-	struct ui_browser b;
-	struct rb_root	  entries;
-	struct rb_node	  *curr_hot;
-	struct objdump_line *selection;
-	int		    nr_asm_entries;
-	int		    nr_entries;
-	bool		    hide_src_code;
-};
-
-struct objdump_line_rb_node {
-	struct rb_node	rb_node;
-	double		percent;
-	u32		idx;
-	int		idx_asm;
-};
-
-static inline
-struct objdump_line_rb_node *objdump_line__rb(struct objdump_line *self)
-{
-	return (struct objdump_line_rb_node *)(self + 1);
-}
-
-static bool objdump_line__filter(struct ui_browser *browser, void *entry)
-{
-	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
-
-	if (ab->hide_src_code) {
-		struct objdump_line *ol = list_entry(entry, struct objdump_line, node);
-		return ol->offset == -1;
-	}
-
-	return false;
-}
-
-static void annotate_browser__write(struct ui_browser *self, void *entry, int row)
-{
-	struct annotate_browser *ab = container_of(self, struct annotate_browser, b);
-	struct objdump_line *ol = list_entry(entry, struct objdump_line, node);
-	bool current_entry = ui_browser__is_current_entry(self, row);
-	int width = self->width;
-
-	if (ol->offset != -1) {
-		struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
-		ui_browser__set_percent_color(self, olrb->percent, current_entry);
-		slsmg_printf(" %7.2f ", olrb->percent);
-	} else {
-		ui_browser__set_percent_color(self, 0, current_entry);
-		slsmg_write_nstring(" ", 9);
-	}
-
-	SLsmg_write_char(':');
-	slsmg_write_nstring(" ", 8);
-
-	/* The scroll bar isn't being used */
-	if (!self->navkeypressed)
-		width += 1;
-
-	if (!ab->hide_src_code && ol->offset != -1)
-		if (!current_entry || (self->use_navkeypressed &&
-				       !self->navkeypressed))
-			ui_browser__set_color(self, HE_COLORSET_CODE);
-
-	if (!*ol->line)
-		slsmg_write_nstring(" ", width - 18);
-	else
-		slsmg_write_nstring(ol->line, width - 18);
-
-	if (current_entry)
-		ab->selection = ol;
-}
-
-static double objdump_line__calc_percent(struct objdump_line *self,
-					 struct symbol *sym, int evidx)
-{
-	double percent = 0.0;
-
-	if (self->offset != -1) {
-		int len = sym->end - sym->start;
-		unsigned int hits = 0;
-		struct annotation *notes = symbol__annotation(sym);
-		struct source_line *src_line = notes->src->lines;
-		struct sym_hist *h = annotation__histogram(notes, evidx);
-		s64 offset = self->offset;
-		struct objdump_line *next;
-
-		next = objdump__get_next_ip_line(&notes->src->source, self);
-		while (offset < (s64)len &&
-		       (next == NULL || offset < next->offset)) {
-			if (src_line) {
-				percent += src_line[offset].percent;
-			} else
-				hits += h->addr[offset];
-
-			++offset;
-		}
-		/*
- 		 * If the percentage wasn't already calculated in
- 		 * symbol__get_source_line, do it now:
- 		 */
-		if (src_line == NULL && h->sum)
-			percent = 100.0 * hits / h->sum;
-	}
-
-	return percent;
-}
-
-static void objdump__insert_line(struct rb_root *self,
-				 struct objdump_line_rb_node *line)
-{
-	struct rb_node **p = &self->rb_node;
-	struct rb_node *parent = NULL;
-	struct objdump_line_rb_node *l;
-
-	while (*p != NULL) {
-		parent = *p;
-		l = rb_entry(parent, struct objdump_line_rb_node, rb_node);
-		if (line->percent < l->percent)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&line->rb_node, parent, p);
-	rb_insert_color(&line->rb_node, self);
-}
-
-static void annotate_browser__set_top(struct annotate_browser *self,
-				      struct rb_node *nd)
-{
-	struct objdump_line_rb_node *rbpos;
-	struct objdump_line *pos;
-	unsigned back;
-
-	ui_browser__refresh_dimensions(&self->b);
-	back = self->b.height / 2;
-	rbpos = rb_entry(nd, struct objdump_line_rb_node, rb_node);
-	pos = ((struct objdump_line *)rbpos) - 1;
-	self->b.top_idx = self->b.index = rbpos->idx;
-
-	while (self->b.top_idx != 0 && back != 0) {
-		pos = list_entry(pos->node.prev, struct objdump_line, node);
-
-		--self->b.top_idx;
-		--back;
-	}
-
-	self->b.top = pos;
-	self->curr_hot = nd;
-}
-
-static void annotate_browser__calc_percent(struct annotate_browser *browser,
-					   int evidx)
-{
-	struct map_symbol *ms = browser->b.priv;
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
-	struct objdump_line *pos;
-
-	browser->entries = RB_ROOT;
-
-	pthread_mutex_lock(&notes->lock);
-
-	list_for_each_entry(pos, &notes->src->source, node) {
-		struct objdump_line_rb_node *rbpos = objdump_line__rb(pos);
-		rbpos->percent = objdump_line__calc_percent(pos, sym, evidx);
-		if (rbpos->percent < 0.01) {
-			RB_CLEAR_NODE(&rbpos->rb_node);
-			continue;
-		}
-		objdump__insert_line(&browser->entries, rbpos);
-	}
-	pthread_mutex_unlock(&notes->lock);
-
-	browser->curr_hot = rb_last(&browser->entries);
-}
-
-static bool annotate_browser__toggle_source(struct annotate_browser *browser)
-{
-	struct objdump_line *ol;
-	struct objdump_line_rb_node *olrb;
-	off_t offset = browser->b.index - browser->b.top_idx;
-
-	browser->b.seek(&browser->b, offset, SEEK_CUR);
-	ol = list_entry(browser->b.top, struct objdump_line, node);
-	olrb = objdump_line__rb(ol);
-
-	if (browser->hide_src_code) {
-		if (olrb->idx_asm < offset)
-			offset = olrb->idx;
-
-		browser->b.nr_entries = browser->nr_entries;
-		browser->hide_src_code = false;
-		browser->b.seek(&browser->b, -offset, SEEK_CUR);
-		browser->b.top_idx = olrb->idx - offset;
-		browser->b.index = olrb->idx;
-	} else {
-		if (olrb->idx_asm < 0) {
-			ui_helpline__puts("Only available for assembly lines.");
-			browser->b.seek(&browser->b, -offset, SEEK_CUR);
-			return false;
-		}
-
-		if (olrb->idx_asm < offset)
-			offset = olrb->idx_asm;
-
-		browser->b.nr_entries = browser->nr_asm_entries;
-		browser->hide_src_code = true;
-		browser->b.seek(&browser->b, -offset, SEEK_CUR);
-		browser->b.top_idx = olrb->idx_asm - offset;
-		browser->b.index = olrb->idx_asm;
-	}
-
-	return true;
-}
-
-static int annotate_browser__run(struct annotate_browser *self, int evidx,
-				 void(*timer)(void *arg),
-				 void *arg, int delay_secs)
-{
-	struct rb_node *nd = NULL;
-	struct map_symbol *ms = self->b.priv;
-	struct symbol *sym = ms->sym;
-	const char *help = "<-/ESC: Exit, TAB/shift+TAB: Cycle hot lines, "
-			   "H: Go to hottest line, ->/ENTER: Line action, "
-			   "S: Toggle source code view";
-	int key;
-
-	if (ui_browser__show(&self->b, sym->name, help) < 0)
-		return -1;
-
-	annotate_browser__calc_percent(self, evidx);
-
-	if (self->curr_hot)
-		annotate_browser__set_top(self, self->curr_hot);
-
-	nd = self->curr_hot;
-
-	while (1) {
-		key = ui_browser__run(&self->b, delay_secs);
-
-		if (delay_secs != 0) {
-			annotate_browser__calc_percent(self, evidx);
-			/*
-			 * Current line focus got out of the list of most active
-			 * lines, NULL it so that if TAB|UNTAB is pressed, we
-			 * move to curr_hot (current hottest line).
-			 */
-			if (nd != NULL && RB_EMPTY_NODE(nd))
-				nd = NULL;
-		}
-
-		switch (key) {
-		case K_TIMER:
-			if (timer != NULL)
-				timer(arg);
-
-			if (delay_secs != 0)
-				symbol__annotate_decay_histogram(sym, evidx);
-			continue;
-		case K_TAB:
-			if (nd != NULL) {
-				nd = rb_prev(nd);
-				if (nd == NULL)
-					nd = rb_last(&self->entries);
-			} else
-				nd = self->curr_hot;
-			break;
-		case K_UNTAB:
-			if (nd != NULL)
-				nd = rb_next(nd);
-				if (nd == NULL)
-					nd = rb_first(&self->entries);
-			else
-				nd = self->curr_hot;
-			break;
-		case 'H':
-		case 'h':
-			nd = self->curr_hot;
-			break;
-		case 'S':
-		case 's':
-			if (annotate_browser__toggle_source(self))
-				ui_helpline__puts(help);
-			continue;
-		case K_ENTER:
-		case K_RIGHT:
-			if (self->selection == NULL) {
-				ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org");
-				continue;
-			}
-
-			if (self->selection->offset == -1) {
-				ui_helpline__puts("Actions are only available for assembly lines.");
-				continue;
-			} else {
-				char *s = strstr(self->selection->line, "callq ");
-				struct annotation *notes;
-				struct symbol *target;
-				u64 ip;
-
-				if (s == NULL) {
-					ui_helpline__puts("Actions are only available for the 'callq' instruction.");
-					continue;
-				}
-
-				s = strchr(s, ' ');
-				if (s++ == NULL) {
-					ui_helpline__puts("Invallid callq instruction.");
-					continue;
-				}
-
-				ip = strtoull(s, NULL, 16);
-				ip = ms->map->map_ip(ms->map, ip);
-				target = map__find_symbol(ms->map, ip, NULL);
-				if (target == NULL) {
-					ui_helpline__puts("The called function was not found.");
-					continue;
-				}
-
-				notes = symbol__annotation(target);
-				pthread_mutex_lock(&notes->lock);
-
-				if (notes->src == NULL && symbol__alloc_hist(target) < 0) {
-					pthread_mutex_unlock(&notes->lock);
-					ui__warning("Not enough memory for annotating '%s' symbol!\n",
-						    target->name);
-					continue;
-				}
-
-				pthread_mutex_unlock(&notes->lock);
-				symbol__tui_annotate(target, ms->map, evidx,
-						     timer, arg, delay_secs);
-				ui_browser__show_title(&self->b, sym->name);
-			}
-			continue;
-		case K_LEFT:
-		case K_ESC:
-		case 'q':
-		case CTRL('c'):
-			goto out;
-		default:
-			continue;
-		}
-
-		if (nd != NULL)
-			annotate_browser__set_top(self, nd);
-	}
-out:
-	ui_browser__hide(&self->b);
-	return key;
-}
-
-int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
-			     void(*timer)(void *arg), void *arg, int delay_secs)
-{
-	return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx,
-				    timer, arg, delay_secs);
-}
-
-int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
-			 void(*timer)(void *arg), void *arg,
-			 int delay_secs)
-{
-	struct objdump_line *pos, *n;
-	struct annotation *notes;
-	struct map_symbol ms = {
-		.map = map,
-		.sym = sym,
-	};
-	struct annotate_browser browser = {
-		.b = {
-			.refresh = ui_browser__list_head_refresh,
-			.seek	 = ui_browser__list_head_seek,
-			.write	 = annotate_browser__write,
-			.filter  = objdump_line__filter,
-			.priv	 = &ms,
-			.use_navkeypressed = true,
-		},
-	};
-	int ret;
-
-	if (sym == NULL)
-		return -1;
-
-	if (map->dso->annotate_warned)
-		return -1;
-
-	if (symbol__annotate(sym, map, sizeof(struct objdump_line_rb_node)) < 0) {
-		ui__error("%s", ui_helpline__last_msg);
-		return -1;
-	}
-
-	ui_helpline__push("Press <- or ESC to exit");
-
-	notes = symbol__annotation(sym);
-
-	list_for_each_entry(pos, &notes->src->source, node) {
-		struct objdump_line_rb_node *rbpos;
-		size_t line_len = strlen(pos->line);
-
-		if (browser.b.width < line_len)
-			browser.b.width = line_len;
-		rbpos = objdump_line__rb(pos);
-		rbpos->idx = browser.nr_entries++;
-		if (pos->offset != -1)
-			rbpos->idx_asm = browser.nr_asm_entries++;
-		else
-			rbpos->idx_asm = -1;
-	}
-
-	browser.b.nr_entries = browser.nr_entries;
-	browser.b.entries = &notes->src->source,
-	browser.b.width += 18; /* Percentage */
-	ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs);
-	list_for_each_entry_safe(pos, n, &notes->src->source, node) {
-		list_del(&pos->node);
-		objdump_line__free(pos);
-	}
-	return ret;
-}
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 52bb07c..4007aca 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -82,41 +82,3 @@
 	warn_routine(warn, params);
 	va_end(params);
 }
-
-uid_t parse_target_uid(const char *str, const char *tid, const char *pid)
-{
-	struct passwd pwd, *result;
-	char buf[1024];
-
-	if (str == NULL)
-		return UINT_MAX;
-
-	/* UID and PID are mutually exclusive */
-	if (tid || pid) {
-		ui__warning("PID/TID switch overriding UID\n");
-		sleep(1);
-		return UINT_MAX;
-	}
-
-	getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
-
-	if (result == NULL) {
-		char *endptr;
-		int uid = strtol(str, &endptr, 10);
-
-		if (*endptr != '\0') {
-			ui__error("Invalid user %s\n", str);
-			return UINT_MAX - 1;
-		}
-
-		getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
-
-		if (result == NULL) {
-			ui__error("Problems obtaining information for user %s\n",
-				  str);
-			return UINT_MAX - 1;
-		}
-	}
-
-	return result->pw_uid;
-}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 8109a90..d03599f 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -148,3 +148,13 @@
 
 	return buf - buf_start;
 }
+
+size_t hex_width(u64 v)
+{
+	size_t n = 1;
+
+	while ((v >>= 4))
+		++n;
+
+	return n;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 0f99f39..2daaedb 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -74,7 +74,6 @@
 #include <netinet/tcp.h>
 #include <arpa/inet.h>
 #include <netdb.h>
-#include <pwd.h>
 #include <inttypes.h>
 #include "../../../include/linux/magic.h"
 #include "types.h"
@@ -249,8 +248,6 @@
 
 void event_attr_init(struct perf_event_attr *attr);
 
-uid_t parse_target_uid(const char *str, const char *tid, const char *pid);
-
 #define _STR(x) #x
 #define STR(x) _STR(x)
 
@@ -265,4 +262,6 @@
 	return (n != 0 && ((n & (n - 1)) == 0));
 }
 
+size_t hex_width(u64 v);
+
 #endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
new file mode 100644
index 0000000..87b55a7
--- /dev/null
+++ b/tools/scripts/Makefile.include
@@ -0,0 +1,57 @@
+ifeq ("$(origin O)", "command line")
+	OUTPUT := $(O)/
+endif
+
+ifneq ($(OUTPUT),)
+# check that the output directory actually exists
+OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
+#
+# Include saner warnings here, which can catch bugs:
+#
+EXTRA_WARNINGS := -Wbad-function-cast
+EXTRA_WARNINGS += -Wdeclaration-after-statement
+EXTRA_WARNINGS += -Wformat-security
+EXTRA_WARNINGS += -Wformat-y2k
+EXTRA_WARNINGS += -Winit-self
+EXTRA_WARNINGS += -Wmissing-declarations
+EXTRA_WARNINGS += -Wmissing-prototypes
+EXTRA_WARNINGS += -Wnested-externs
+EXTRA_WARNINGS += -Wno-system-headers
+EXTRA_WARNINGS += -Wold-style-definition
+EXTRA_WARNINGS += -Wpacked
+EXTRA_WARNINGS += -Wredundant-decls
+EXTRA_WARNINGS += -Wshadow
+EXTRA_WARNINGS += -Wstrict-aliasing=3
+EXTRA_WARNINGS += -Wstrict-prototypes
+EXTRA_WARNINGS += -Wswitch-default
+EXTRA_WARNINGS += -Wswitch-enum
+EXTRA_WARNINGS += -Wundef
+EXTRA_WARNINGS += -Wwrite-strings
+EXTRA_WARNINGS += -Wformat
+
+ifneq ($(findstring $(MAKEFLAGS), w),w)
+PRINT_DIR = --no-print-directory
+else
+NO_SUBDIR = :
+endif
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_CC       = @echo '   ' CC $@;
+	QUIET_AR       = @echo '   ' AR $@;
+	QUIET_LINK     = @echo '   ' LINK $@;
+	QUIET_MKDIR    = @echo '   ' MKDIR $@;
+	QUIET_GEN      = @echo '   ' GEN $@;
+	QUIET_SUBDIR0  = +@subdir=
+	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			 $(MAKE) $(PRINT_DIR) -C $$subdir
+	QUIET_FLEX     = @echo '   ' FLEX $@;
+	QUIET_BISON    = @echo '   ' BISON $@;
+endif
+endif