Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net Pull networking fixes from David Miller: 1) Fix checksumming regressions, from Tom Herbert. 2) Undo unintentional permissions changes for SCTP rto_alpha and rto_beta sysfs knobs, from Denial Borkmann. 3) VXLAN, like other IP tunnels, should advertize it's encapsulation size using dev->needed_headroom instead of dev->hard_header_len. From Cong Wang. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: net: sctp: fix permissions for rto_alpha and rto_beta knobs vxlan: Checksum fixes net: add skb_pop_rcv_encapsulation udp: call __skb_checksum_complete when doing full checksum net: Fix save software checksum complete net: Fix GSO constants to match NETIF flags udp: ipv4: do not waste time in __udp4_lib_mcast_demux_lookup vxlan: use dev->needed_headroom instead of dev->hard_header_len MAINTAINERS: update cxgb4 maintainer

commit: a9be22425e767d936105679fdc9f568b97bd47cf [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Sun Jun 15 16:37:03 2014 -1000
committer: Linus Torvalds <torvalds@linux-foundation.org> Sun Jun 15 16:37:03 2014 -1000
tree: 37a63136da83dcf272668462f96eed1e96f37de3
parent: dd1845af24a47b70cf84c29126698884f740ff9c [diff]
parent: b58537a1f5629bdc98a8b9dc2051ce0e952f6b4b [diff]
diff --git a/.gitignore b/.gitignore
index 42fa0d5..f4c0b09 100644
--- a/.gitignore
+++ b/.gitignore

@@ -22,7 +22,6 @@
 *.lst
 *.symtypes
 *.order
-modules.builtin
 *.elf
 *.bin
 *.gz
@@ -33,6 +32,8 @@
 *.lzo
 *.patch
 *.gcno
+modules.builtin
+Module.symvers
 
 #
 # Top-level generic files
@@ -44,7 +45,6 @@
 /vmlinuz
 /System.map
 /Module.markers
-/Module.symvers
 
 #
 # Debian directory (make deb-pkg)

diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
index a5160d8..b9ec668 100644
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt

@@ -20,12 +20,15 @@
 	"allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13
 	"allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s
 	"allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20
+	"allwinner,sun6i-a31-ar100-clk" - for the AR100 on A31
 	"allwinner,sun6i-a31-ahb1-mux-clk" - for the AHB1 multiplexer on A31
 	"allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31
 	"allwinner,sun4i-a10-apb0-clk" - for the APB0 clock
+	"allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31
 	"allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10
 	"allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
 	"allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
+	"allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31
 	"allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
 	"allwinner,sun4i-a10-apb1-clk" - for the APB1 clock
 	"allwinner,sun4i-a10-apb1-mux-clk" - for the APB1 clock muxing
@@ -41,6 +44,7 @@
 	"allwinner,sun7i-a20-gmac-clk" - for the GMAC clock module on A20/A31
 	"allwinner,sun4i-a10-usb-clk" - for usb gates + resets on A10 / A20
 	"allwinner,sun5i-a13-usb-clk" - for usb gates + resets on A13
+	"allwinner,sun6i-a31-usb-clk" - for usb gates + resets on A31
 
 Required properties for all clocks:
 - reg : shall be the control register address for the clock.

diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
index 7faf5a6..ade4dd4 100644
--- a/Documentation/devicetree/bindings/clock/ti/apll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt

@@ -14,18 +14,32 @@
 [2] Documentation/devicetree/bindings/clock/ti/dpll.txt
 
 Required properties:
-- compatible : shall be "ti,dra7-apll-clock"
+- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock"
 - #clock-cells : from common clock binding; shall be set to 0.
 - clocks : link phandles of parent clocks (clk-ref and clk-bypass)
 - reg : address and length of the register set for controlling the APLL.
   It contains the information of registers in the following order:
-	"control" - contains the control register base address
-	"idlest" - contains the idlest register base address
+	"control" - contains the control register offset
+	"idlest" - contains the idlest register offset
+	"autoidle" - contains the autoidle register offset (OMAP2 only)
+- ti,clock-frequency : static clock frequency for the clock (OMAP2 only)
+- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only)
+- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only)
 
 Examples:
-	apll_pcie_ck: apll_pcie_ck@4a008200 {
+	apll_pcie_ck: apll_pcie_ck {
 		#clock-cells = <0>;
 		clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>;
-		reg = <0x4a00821c 0x4>, <0x4a008220 0x4>;
+		reg = <0x021c>, <0x0220>;
 		compatible = "ti,dra7-apll-clock";
 	};
+
+	apll96_ck: apll96_ck {
+		#clock-cells = <0>;
+		compatible = "ti,omap2-apll-clock";
+		clocks = <&sys_ck>;
+		ti,bit-shift = <2>;
+		ti,idlest-shift = <8>;
+		ti,clock-frequency = <96000000>;
+		reg = <0x0500>, <0x0530>, <0x0520>;
+	};

diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
index 30bfdb7..df57009 100644
--- a/Documentation/devicetree/bindings/clock/ti/dpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt

@@ -24,12 +24,14 @@
 		"ti,omap4-dpll-core-clock",
 		"ti,omap4-dpll-m4xen-clock",
 		"ti,omap4-dpll-j-type-clock",
+		"ti,omap5-mpu-dpll-clock",
 		"ti,am3-dpll-no-gate-clock",
 		"ti,am3-dpll-j-type-clock",
 		"ti,am3-dpll-no-gate-j-type-clock",
 		"ti,am3-dpll-clock",
 		"ti,am3-dpll-core-clock",
 		"ti,am3-dpll-x2-clock",
+		"ti,omap2-dpll-core-clock",
 
 - #clock-cells : from common clock binding; shall be set to 0.
 - clocks : link phandles of parent clocks, first entry lists reference clock
@@ -41,6 +43,7 @@
 	"mult-div1" - contains the multiplier / divider register base address
 	"autoidle" - contains the autoidle register base address (optional)
   ti,am3-* dpll types do not have autoidle register
+  ti,omap2-* dpll type does not support idlest / autoidle registers
 
 Optional properties:
 - DPLL mode setting - defining any one or more of the following overrides
@@ -73,3 +76,10 @@
 		clocks = <&sys_clkin_ck>, <&sys_clkin_ck>;
 		reg = <0x90>, <0x5c>, <0x68>;
 	};
+
+	dpll_ck: dpll_ck {
+		#clock-cells = <0>;
+		compatible = "ti,omap2-dpll-core-clock";
+		clocks = <&sys_ck>, <&sys_ck>;
+		reg = <0x0500>, <0x0540>;
+	};

diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
new file mode 100644
index 0000000..585e8c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt

@@ -0,0 +1,96 @@
+Device Tree Clock bindings for ATL (Audio Tracking Logic) of DRA7 SoC.
+
+The ATL IP is used to generate clock to be used to synchronize baseband and
+audio codec. A single ATL IP provides four ATL clock instances sharing the same
+functional clock but can be configured to provide different clocks.
+ATL can maintain a clock averages to some desired frequency based on the bws/aws
+signals - can compensate the drift between the two ws signal.
+
+In order to provide the support for ATL and it's output clocks (which can be used
+internally within the SoC or external components) two sets of bindings is needed:
+
+Clock tree binding:
+This binding uses the common clock binding[1].
+To be able to integrate the ATL clocks with DT clock tree.
+Provides ccf level representation of the ATL clocks to be used by drivers.
+Since the clock instances are part of a single IP this binding is used as a node
+for the DT clock tree, the IP driver is needed to handle the actual configuration
+of the IP.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,dra7-atl-clock"
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles to functional clock of ATL
+
+Binding for the IP driver:
+This binding is used to configure the IP driver which is going to handle the
+configuration of the IP for the ATL clock instances.
+
+Required properties:
+- compatible : shall be "ti,dra7-atl"
+- reg : base address for the ATL IP
+- ti,provided-clocks : List of phandles to the clocks associated with the ATL
+- clocks : link phandles to functional clock of ATL
+- clock-names : Shall be set to "fck"
+- ti,hwmods : Shall be set to "atl"
+
+Optional properties:
+Configuration of ATL instances:
+- atl{0/1/2/3} {
+	- bws : Baseband word select signal selection
+	- aws : Audio word select signal selection
+};
+
+For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include
+file.
+
+Examples:
+/* clock bindings for atl provided clocks */
+atl_clkin0_ck: atl_clkin0_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin1_ck: atl_clkin1_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin2_ck: atl_clkin2_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin3_ck: atl_clkin3_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+/* binding for the IP */
+atl: atl@4843c000 {
+	compatible = "ti,dra7-atl";
+	reg = <0x4843c000 0x3ff>;
+	ti,hwmods = "atl";
+	ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>,
+				<&atl_clkin2_ck>, <&atl_clkin3_ck>;
+	clocks = <&atl_gfclk_mux>;
+	clock-names = "fck";
+	status = "disabled";
+};
+
+#include <dt-bindings/clk/ti-dra7-atl.h>
+
+&atl {
+	status = "okay";
+
+	atl2 {
+		bws = <DRA7_ATL_WS_MCASP2_FSX>;
+		aws = <DRA7_ATL_WS_MCASP3_FSX>;
+	};
+};

diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt
index 125281a..03f8fde 100644
--- a/Documentation/devicetree/bindings/clock/ti/gate.txt
+++ b/Documentation/devicetree/bindings/clock/ti/gate.txt

@@ -25,6 +25,11 @@
 			  to map clockdomains properly
   "ti,hsdiv-gate-clock" - gate clock with OMAP36xx specific hardware handling,
 			  required for a hardware errata
+  "ti,composite-gate-clock" - composite gate clock, to be part of composite
+			      clock
+  "ti,composite-no-wait-gate-clock" - composite gate clock that does not wait
+				      for clock to be active before returning
+				      from clk_enable()
 - #clock-cells : from common clock binding; shall be set to 0
 - clocks : link to phandle of parent clock
 - reg : offset for register controlling adjustable gate, not needed for
@@ -41,7 +46,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,gate-clock";
 		clocks = <&core_96m_fck>;
-		reg = <0x48004a00 0x4>;
+		reg = <0x0a00>;
 		ti,bit-shift = <25>;
 	};
 
@@ -57,7 +62,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,dss-gate-clock";
 		clocks = <&dpll4_m4x2_ck>;
-		reg = <0x48004e00 0x4>;
+		reg = <0x0e00>;
 		ti,bit-shift = <0>;
 	};
 
@@ -65,7 +70,7 @@
 		#clock-cells = <0>;
 		compatible = "ti,am35xx-gate-clock";
 		clocks = <&ipss_ick>;
-		reg = <0x4800259c 0x4>;
+		reg = <0x059c>;
 		ti,bit-shift = <1>;
 	};
 
@@ -80,6 +85,22 @@
 		compatible = "ti,hsdiv-gate-clock";
 		clocks = <&dpll4_m2x2_mul_ck>;
 		ti,bit-shift = <0x1b>;
-		reg = <0x48004d00 0x4>;
+		reg = <0x0d00>;
 		ti,set-bit-to-disable;
 	};
+
+	vlynq_gate_fck: vlynq_gate_fck {
+		#clock-cells = <0>;
+		compatible = "ti,composite-gate-clock";
+		clocks = <&core_ck>;
+		ti,bit-shift = <3>;
+		reg = <0x0200>;
+	};
+
+	sys_clkout2_src_gate: sys_clkout2_src_gate {
+		#clock-cells = <0>;
+		compatible = "ti,composite-no-wait-gate-clock";
+		clocks = <&core_ck>;
+		ti,bit-shift = <15>;
+		reg = <0x0070>;
+	};

diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
index 064e8ca..3111a40 100644
--- a/Documentation/devicetree/bindings/clock/ti/interface.txt
+++ b/Documentation/devicetree/bindings/clock/ti/interface.txt

@@ -21,6 +21,8 @@
   "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling
   "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling
   "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling
+  "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW
+				  handling
 - #clock-cells : from common clock binding; shall be set to 0
 - clocks : link to phandle of parent clock
 - reg : base address for the control register

diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1
new file mode 100644
index 0000000..6b1e054
--- /dev/null
+++ b/Documentation/hwmon/shtc1

@@ -0,0 +1,43 @@
+Kernel driver shtc1
+===================
+
+Supported chips:
+  * Sensirion SHTC1
+    Prefix: 'shtc1'
+    Addresses scanned: none
+    Datasheet: http://www.sensirion.com/file/datasheet_shtc1
+
+  * Sensirion SHTW1
+    Prefix: 'shtw1'
+    Addresses scanned: none
+    Datasheet: Not publicly available
+
+Author:
+  Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHTC1 chip, a humidity and
+temperature sensor. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. Driver can be used as well for SHTW1
+chip, which has the same electrical interface.
+
+The device communicates with the I2C protocol. All sensors are set to I2C
+address 0x70. See Documentation/i2c/instantiating-devices for methods to
+instantiate the device.
+
+There are two options configurable by means of shtc1_platform_data:
+1. blocking (pull the I2C clock line down while performing the measurement) or
+   non-blocking mode. Blocking mode will guarantee the fastest result but
+   the I2C bus will be busy during that time. By default, non-blocking mode
+   is used. Make sure clock-stretching works properly on your device if you
+   want to use blocking mode.
+2. high or low accuracy. High accuracy is used by default and using it is
+   strongly recommended.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input

diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index 69372fb..3fb39e0 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt

@@ -470,7 +470,7 @@
 
 	Sometimes, an external module uses exported symbols from
 	another external module. kbuild needs to have full knowledge of
-	all symbols to avoid spliitting out warnings about undefined
+	all symbols to avoid spitting out warnings about undefined
 	symbols. Three solutions exist for this situation.
 
 	NOTE: The method with a top-level kbuild file is recommended

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 0cfb00f..4bbeca8 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt

@@ -22,8 +22,9 @@
 
 Kprobes enables you to dynamically break into any kernel routine and
 collect debugging and performance information non-disruptively. You
-can trap at almost any kernel code address, specifying a handler
+can trap at almost any kernel code address(*), specifying a handler
 routine to be invoked when the breakpoint is hit.
+(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
 
 There are currently three types of probes: kprobes, jprobes, and
 kretprobes (also called return probes).  A kprobe can be inserted
@@ -273,6 +274,19 @@
  or
 - Execute 'sysctl -w debug.kprobes_optimization=n'
 
+1.5 Blacklist
+
+Kprobes can probe most of the kernel except itself. This means
+that there are some functions where kprobes cannot probe. Probing
+(trapping) such functions can cause a recursive trap (e.g. double
+fault) or the nested probe handler may never be called.
+Kprobes manages such functions as a blacklist.
+If you want to add a function into the blacklist, you just need
+to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
+to specify a blacklisted function.
+Kprobes checks the given probe address against the blacklist and
+rejects registering it, if the given address is in the blacklist.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following

diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
index 1dfe62c..ee231ed 100644
--- a/Documentation/mutex-design.txt
+++ b/Documentation/mutex-design.txt

@@ -1,139 +1,157 @@
 Generic Mutex Subsystem
 
 started by Ingo Molnar <mingo@redhat.com>
+updated by Davidlohr Bueso <davidlohr@hp.com>
 
-  "Why on earth do we need a new mutex subsystem, and what's wrong
-   with semaphores?"
+What are mutexes?
+-----------------
 
-firstly, there's nothing wrong with semaphores. But if the simpler
-mutex semantics are sufficient for your code, then there are a couple
-of advantages of mutexes:
+In the Linux kernel, mutexes refer to a particular locking primitive
+that enforces serialization on shared memory systems, and not only to
+the generic term referring to 'mutual exclusion' found in academia
+or similar theoretical text books. Mutexes are sleeping locks which
+behave similarly to binary semaphores, and were introduced in 2006[1]
+as an alternative to these. This new data structure provided a number
+of advantages, including simpler interfaces, and at that time smaller
+code (see Disadvantages).
 
- - 'struct mutex' is smaller on most architectures: E.g. on x86,
-   'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
-   A smaller structure size means less RAM footprint, and better
-   CPU-cache utilization.
+[1] http://lwn.net/Articles/164802/
 
- - tighter code. On x86 i get the following .text sizes when
-   switching all mutex-alike semaphores in the kernel to the mutex
-   subsystem:
+Implementation
+--------------
 
-        text    data     bss     dec     hex filename
-     3280380  868188  396860 4545428  455b94 vmlinux-semaphore
-     3255329  865296  396732 4517357  44eded vmlinux-mutex
+Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
+and implemented in kernel/locking/mutex.c. These locks use a three
+state atomic counter (->count) to represent the different possible
+transitions that can occur during the lifetime of a lock:
 
-   that's 25051 bytes of code saved, or a 0.76% win - off the hottest
-   codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%)
-   Smaller code means better icache footprint, which is one of the
-   major optimization goals in the Linux kernel currently.
+	  1: unlocked
+	  0: locked, no waiters
+   negative: locked, with potential waiters
 
- - the mutex subsystem is slightly faster and has better scalability for
-   contended workloads. On an 8-way x86 system, running a mutex-based
-   kernel and testing creat+unlink+close (of separate, per-task files)
-   in /tmp with 16 parallel tasks, the average number of ops/sec is:
+In its most basic form it also includes a wait-queue and a spinlock
+that serializes access to it. CONFIG_SMP systems can also include
+a pointer to the lock task owner (->owner) as well as a spinner MCS
+lock (->osq), both described below in (ii).
 
-    Semaphores:                        Mutexes:
+When acquiring a mutex, there are three possible paths that can be
+taken, depending on the state of the lock:
 
-    $ ./test-mutex V 16 10             $ ./test-mutex V 16 10
-    8 CPUs, running 16 tasks.          8 CPUs, running 16 tasks.
-    checking VFS performance.          checking VFS performance.
-    avg loops/sec:      34713          avg loops/sec:      84153
-    CPU utilization:    63%            CPU utilization:    22%
+(i) fastpath: tries to atomically acquire the lock by decrementing the
+    counter. If it was already taken by another task it goes to the next
+    possible path. This logic is architecture specific. On x86-64, the
+    locking fastpath is 2 instructions:
 
-   i.e. in this workload, the mutex based kernel was 2.4 times faster
-   than the semaphore based kernel, _and_ it also had 2.8 times less CPU
-   utilization. (In terms of 'ops per CPU cycle', the semaphore kernel
-   performed 551 ops/sec per 1% of CPU time used, while the mutex kernel
-   performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times
-   more efficient.)
-
-   the scalability difference is visible even on a 2-way P4 HT box:
-
-    Semaphores:                        Mutexes:
-
-    $ ./test-mutex V 16 10             $ ./test-mutex V 16 10
-    4 CPUs, running 16 tasks.          8 CPUs, running 16 tasks.
-    checking VFS performance.          checking VFS performance.
-    avg loops/sec:      127659         avg loops/sec:      181082
-    CPU utilization:    100%           CPU utilization:    34%
-
-   (the straight performance advantage of mutexes is 41%, the per-cycle
-    efficiency of mutexes is 4.1 times better.)
-
- - there are no fastpath tradeoffs, the mutex fastpath is just as tight
-   as the semaphore fastpath. On x86, the locking fastpath is 2
-   instructions:
-
-    c0377ccb <mutex_lock>:
-    c0377ccb:       f0 ff 08                lock decl (%eax)
-    c0377cce:       78 0e                   js     c0377cde <.text..lock.mutex>
-    c0377cd0:       c3                      ret
+    0000000000000e10 <mutex_lock>:
+    e21:   f0 ff 0b                lock decl (%rbx)
+    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
 
    the unlocking fastpath is equally tight:
 
-    c0377cd1 <mutex_unlock>:
-    c0377cd1:       f0 ff 00                lock incl (%eax)
-    c0377cd4:       7e 0f                   jle    c0377ce5 <.text..lock.mutex+0x7>
-    c0377cd6:       c3                      ret
+    0000000000000bc0 <mutex_unlock>:
+    bc8:   f0 ff 07                lock incl (%rdi)
+    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
 
- - 'struct mutex' semantics are well-defined and are enforced if
-   CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have
-   virtually no debugging code or instrumentation. The mutex subsystem
-   checks and enforces the following rules:
 
-   * - only one task can hold the mutex at a time
-   * - only the owner can unlock the mutex
-   * - multiple unlocks are not permitted
-   * - recursive locking is not permitted
-   * - a mutex object must be initialized via the API
-   * - a mutex object must not be initialized via memset or copying
-   * - task may not exit with mutex held
-   * - memory areas where held locks reside must not be freed
-   * - held mutexes must not be reinitialized
-   * - mutexes may not be used in hardware or software interrupt
-   *   contexts such as tasklets and timers
+(ii) midpath: aka optimistic spinning, tries to spin for acquisition
+     while the lock owner is running and there are no other tasks ready
+     to run that have higher priority (need_resched). The rationale is
+     that if the lock owner is running, it is likely to release the lock
+     soon. The mutex spinners are queued up using MCS lock so that only
+     one spinner can compete for the mutex.
 
-   furthermore, there are also convenience features in the debugging
-   code:
+     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
+     with the desirable properties of being fair and with each cpu trying
+     to acquire the lock spinning on a local variable. It avoids expensive
+     cacheline bouncing that common test-and-set spinlock implementations
+     incur. An MCS-like lock is specially tailored for optimistic spinning
+     for sleeping lock implementation. An important feature of the customized
+     MCS lock is that it has the extra property that spinners are able to exit
+     the MCS spinlock queue when they need to reschedule. This further helps
+     avoid situations where MCS spinners that need to reschedule would continue
+     waiting to spin on mutex owner, only to go directly to slowpath upon
+     obtaining the MCS lock.
 
-   * - uses symbolic names of mutexes, whenever they are printed in debug output
-   * - point-of-acquire tracking, symbolic lookup of function names
-   * - list of all locks held in the system, printout of them
-   * - owner tracking
-   * - detects self-recursing locks and prints out all relevant info
-   * - detects multi-task circular deadlocks and prints out all affected
-   *   locks and tasks (and only those tasks)
+
+(iii) slowpath: last resort, if the lock is still unable to be acquired,
+      the task is added to the wait-queue and sleeps until woken up by the
+      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
+
+While formally kernel mutexes are sleepable locks, it is path (ii) that
+makes them more practically a hybrid type. By simply not interrupting a
+task and busy-waiting for a few cycles instead of immediately sleeping,
+the performance of this lock has been seen to significantly improve a
+number of workloads. Note that this technique is also used for rw-semaphores.
+
+Semantics
+---------
+
+The mutex subsystem checks and enforces the following rules:
+
+    - Only one task can hold the mutex at a time.
+    - Only the owner can unlock the mutex.
+    - Multiple unlocks are not permitted.
+    - Recursive locking/unlocking is not permitted.
+    - A mutex must only be initialized via the API (see below).
+    - A task may not exit with a mutex held.
+    - Memory areas where held locks reside must not be freed.
+    - Held mutexes must not be reinitialized.
+    - Mutexes may not be used in hardware or software interrupt
+      contexts such as tasklets and timers.
+
+These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
+In addition, the mutex debugging code also implements a number of other
+features that make lock debugging easier and faster:
+
+    - Uses symbolic names of mutexes, whenever they are printed
+      in debug output.
+    - Point-of-acquire tracking, symbolic lookup of function names,
+      list of all locks held in the system, printout of them.
+    - Owner tracking.
+    - Detects self-recursing locks and prints out all relevant info.
+    - Detects multi-task circular deadlocks and prints out all affected
+      locks and tasks (and only those tasks).
+
+
+Interfaces
+----------
+Statically define the mutex:
+   DEFINE_MUTEX(name);
+
+Dynamically initialize the mutex:
+   mutex_init(mutex);
+
+Acquire the mutex, uninterruptible:
+   void mutex_lock(struct mutex *lock);
+   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+   int  mutex_trylock(struct mutex *lock);
+
+Acquire the mutex, interruptible:
+   int mutex_lock_interruptible_nested(struct mutex *lock,
+				       unsigned int subclass);
+   int mutex_lock_interruptible(struct mutex *lock);
+
+Acquire the mutex, interruptible, if dec to 0:
+   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+Unlock the mutex:
+   void mutex_unlock(struct mutex *lock);
+
+Test if the mutex is taken:
+   int mutex_is_locked(struct mutex *lock);
 
 Disadvantages
 -------------
 
-The stricter mutex API means you cannot use mutexes the same way you
-can use semaphores: e.g. they cannot be used from an interrupt context,
-nor can they be unlocked from a different context that which acquired
-it. [ I'm not aware of any other (e.g. performance) disadvantages from
-using mutexes at the moment, please let me know if you find any. ]
+Unlike its original design and purpose, 'struct mutex' is larger than
+most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
+as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
+'struct rw_semaphore' variant. Larger structure sizes mean more CPU
+cache and memory footprint.
 
-Implementation of mutexes
--------------------------
+When to use mutexes
+-------------------
 
-'struct mutex' is the new mutex type, defined in include/linux/mutex.h and
-implemented in kernel/locking/mutex.c. It is a counter-based mutex with a
-spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for
-"locked" and negative numbers (usually -1) for "locked, potential waiters
-queued".
-
-the APIs of 'struct mutex' have been streamlined:
-
- DEFINE_MUTEX(name);
-
- mutex_init(mutex);
-
- void mutex_lock(struct mutex *lock);
- int  mutex_lock_interruptible(struct mutex *lock);
- int  mutex_trylock(struct mutex *lock);
- void mutex_unlock(struct mutex *lock);
- int  mutex_is_locked(struct mutex *lock);
- void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
- int  mutex_lock_interruptible_nested(struct mutex *lock,
-                                      unsigned int subclass);
- int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+Unless the strict semantics of mutexes are unsuitable and/or the critical
+region prevents the lock from being shared, always prefer them to any other
+locking primitive.

diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c
index 8587020..1dbb4b8 100644
--- a/Documentation/vDSO/parse_vdso.c
+++ b/Documentation/vDSO/parse_vdso.c

@@ -1,6 +1,6 @@
 /*
  * parse_vdso.c: Linux reference vDSO parser
- * Written by Andrew Lutomirski, 2011.
+ * Written by Andrew Lutomirski, 2011-2014.
  *
  * This code is meant to be linked in to various programs that run on Linux.
  * As such, it is available with as few restrictions as possible.  This file
@@ -11,13 +11,14 @@
  * it starts a program.  It works equally well in statically and dynamically
  * linked binaries.
  *
- * This code is tested on x86_64.  In principle it should work on any 64-bit
+ * This code is tested on x86.  In principle it should work on any
  * architecture that has a vDSO.
  */
 
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
+#include <limits.h>
 #include <elf.h>
 
 /*
@@ -45,11 +46,18 @@
 
 
 /* And here's the code. */
-
-#ifndef __x86_64__
-# error Not yet ported to non-x86_64 architectures
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+#  define ELF_BITS 64
+# else
+#  define ELF_BITS 32
+# endif
 #endif
 
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
 static struct vdso_info
 {
 	bool valid;
@@ -59,14 +67,14 @@
 	uintptr_t load_offset;  /* load_addr - recorded vaddr */
 
 	/* Symbol table */
-	Elf64_Sym *symtab;
+	ELF(Sym) *symtab;
 	const char *symstrings;
-	Elf64_Word *bucket, *chain;
-	Elf64_Word nbucket, nchain;
+	ELF(Word) *bucket, *chain;
+	ELF(Word) nbucket, nchain;
 
 	/* Version table */
-	Elf64_Versym *versym;
-	Elf64_Verdef *verdef;
+	ELF(Versym) *versym;
+	ELF(Verdef) *verdef;
 } vdso_info;
 
 /* Straight from the ELF specification. */
@@ -92,9 +100,14 @@
 
 	vdso_info.load_addr = base;
 
-	Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
-	Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
-	Elf64_Dyn *dyn = 0;
+	ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+	if (hdr->e_ident[EI_CLASS] !=
+	    (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+		return;  /* Wrong ELF class -- check ELF_BITS */
+	}
+
+	ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+	ELF(Dyn) *dyn = 0;
 
 	/*
 	 * We need two things from the segment table: the load offset
@@ -108,7 +121,7 @@
 				+ (uintptr_t)pt[i].p_offset
 				- (uintptr_t)pt[i].p_vaddr;
 		} else if (pt[i].p_type == PT_DYNAMIC) {
-			dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
+			dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
 		}
 	}
 
@@ -118,7 +131,7 @@
 	/*
 	 * Fish out the useful bits of the dynamic table.
 	 */
-	Elf64_Word *hash = 0;
+	ELF(Word) *hash = 0;
 	vdso_info.symstrings = 0;
 	vdso_info.symtab = 0;
 	vdso_info.versym = 0;
@@ -131,22 +144,22 @@
 				 + vdso_info.load_offset);
 			break;
 		case DT_SYMTAB:
-			vdso_info.symtab = (Elf64_Sym *)
+			vdso_info.symtab = (ELF(Sym) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
 		case DT_HASH:
-			hash = (Elf64_Word *)
+			hash = (ELF(Word) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
 		case DT_VERSYM:
-			vdso_info.versym = (Elf64_Versym *)
+			vdso_info.versym = (ELF(Versym) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
 		case DT_VERDEF:
-			vdso_info.verdef = (Elf64_Verdef *)
+			vdso_info.verdef = (ELF(Verdef) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
@@ -168,8 +181,8 @@
 	vdso_info.valid = true;
 }
 
-static bool vdso_match_version(Elf64_Versym ver,
-			       const char *name, Elf64_Word hash)
+static bool vdso_match_version(ELF(Versym) ver,
+			       const char *name, ELF(Word) hash)
 {
 	/*
 	 * This is a helper function to check if the version indexed by
@@ -188,7 +201,7 @@
 
 	/* First step: find the version definition */
 	ver &= 0x7fff;  /* Apparently bit 15 means "hidden" */
-	Elf64_Verdef *def = vdso_info.verdef;
+	ELF(Verdef) *def = vdso_info.verdef;
 	while(true) {
 		if ((def->vd_flags & VER_FLG_BASE) == 0
 		    && (def->vd_ndx & 0x7fff) == ver)
@@ -197,11 +210,11 @@
 		if (def->vd_next == 0)
 			return false;  /* No definition. */
 
-		def = (Elf64_Verdef *)((char *)def + def->vd_next);
+		def = (ELF(Verdef) *)((char *)def + def->vd_next);
 	}
 
 	/* Now figure out whether it matches. */
-	Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
+	ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
 	return def->vd_hash == hash
 		&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
 }
@@ -213,10 +226,10 @@
 		return 0;
 
 	ver_hash = elf_hash(version);
-	Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+	ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
 
 	for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
-		Elf64_Sym *sym = &vdso_info.symtab[chain];
+		ELF(Sym) *sym = &vdso_info.symtab[chain];
 
 		/* Check for a defined global or weak function w/ right name. */
 		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
@@ -243,7 +256,7 @@
 
 void vdso_init_from_auxv(void *auxv)
 {
-	Elf64_auxv_t *elf_auxv = auxv;
+	ELF(auxv_t) *elf_auxv = auxv;
 	for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
 	{
 		if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {

diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c
new file mode 100644
index 0000000..d462402
--- /dev/null
+++ b/Documentation/vDSO/vdso_standalone_test_x86.c

@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ *     -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ *      vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary.  On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+	/* This implementation is buggy: it never returns -1. */
+	while (*a || *b) {
+		if (*a != *b)
+			return 1;
+		if (*a == 0 || *b == 0)
+			return 1;
+		a++;
+		b++;
+	}
+
+	return 0;
+}
+
+/* ...and two syscalls.  This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+	long ret;
+#ifdef __x86_64__
+	asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+		      "D" (a0), "S" (a1), "d" (a2) :
+		      "cc", "memory", "rcx",
+		      "r8", "r9", "r10", "r11" );
+#else
+	asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+		      "b" (a0), "c" (a1), "d" (a2) :
+		      "cc", "memory" );
+#endif
+	return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+	return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+	x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, uint64_t n)
+{
+	while (n) {
+		*lastdig = (n % 10) + '0';
+		n /= 10;
+		lastdig--;
+	}
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+	/* Parse the stack */
+	long argc = (long)*stack;
+	stack += argc + 2;
+
+	/* Now we're pointing at the environment.  Skip it. */
+	while(*stack)
+		stack++;
+	stack++;
+
+	/* Now we're pointing at auxv.  Initialize the vDSO parser. */
+	vdso_init_from_auxv((void *)stack);
+
+	/* Find gettimeofday. */
+	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+	gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+	if (!gtod)
+		linux_exit(1);
+
+	struct timeval tv;
+	long ret = gtod(&tv, 0);
+
+	if (ret == 0) {
+		char buf[] = "The time is                     .000000\n";
+		to_base10(buf + 31, tv.tv_sec);
+		to_base10(buf + 38, tv.tv_usec);
+		linux_write(1, buf, sizeof(buf) - 1);
+	} else {
+		linux_exit(ret);
+	}
+
+	linux_exit(0);
+}
+
+/*
+ * This is the real entry point.  It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+	".text\n"
+	".global _start\n"
+	".type _start,@function\n"
+	"_start:\n\t"
+#ifdef __x86_64__
+	"mov %rsp,%rdi\n\t"
+	"jmp c_main"
+#else
+	"push %esp\n\t"
+	"call c_main\n\t"
+	"int $3"
+#endif
+	);

diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c
index fff6334..8daeb7d 100644
--- a/Documentation/vDSO/vdso_test.c
+++ b/Documentation/vDSO/vdso_test.c

@@ -1,111 +1,52 @@
 /*
- * vdso_test.c: Sample code to test parse_vdso.c on x86_64
- * Copyright (c) 2011 Andy Lutomirski
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
  * Subject to the GNU General Public License, version 2
  *
- * You can amuse yourself by compiling with:
- * gcc -std=gnu99 -nostdlib
- *     -Os -fno-asynchronous-unwind-tables -flto
- *      vdso_test.c parse_vdso.c -o vdso_test
- * to generate a small binary with no dependencies at all.
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
  */
 
-#include <sys/syscall.h>
-#include <sys/time.h>
-#include <unistd.h>
 #include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
 
 extern void *vdso_sym(const char *version, const char *name);
 extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
 extern void vdso_init_from_auxv(void *auxv);
 
-/* We need a libc functions... */
-int strcmp(const char *a, const char *b)
+int main(int argc, char **argv)
 {
-	/* This implementation is buggy: it never returns -1. */
-	while (*a || *b) {
-		if (*a != *b)
-			return 1;
-		if (*a == 0 || *b == 0)
-			return 1;
-		a++;
-		b++;
+	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return 0;
 	}
 
-	return 0;
-}
-
-/* ...and two syscalls.  This is x86_64-specific. */
-static inline long linux_write(int fd, const void *data, size_t len)
-{
-
-	long ret;
-	asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write),
-		      "D" (fd), "S" (data), "d" (len) :
-		      "cc", "memory", "rcx",
-		      "r8", "r9", "r10", "r11" );
-	return ret;
-}
-
-static inline void linux_exit(int code)
-{
-	asm volatile ("syscall" : : "a" (__NR_exit), "D" (code));
-}
-
-void to_base10(char *lastdig, uint64_t n)
-{
-	while (n) {
-		*lastdig = (n % 10) + '0';
-		n /= 10;
-		lastdig--;
-	}
-}
-
-__attribute__((externally_visible)) void c_main(void **stack)
-{
-	/* Parse the stack */
-	long argc = (long)*stack;
-	stack += argc + 2;
-
-	/* Now we're pointing at the environment.  Skip it. */
-	while(*stack)
-		stack++;
-	stack++;
-
-	/* Now we're pointing at auxv.  Initialize the vDSO parser. */
-	vdso_init_from_auxv((void *)stack);
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
 
 	/* Find gettimeofday. */
 	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
 	gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
 
-	if (!gtod)
-		linux_exit(1);
+	if (!gtod) {
+		printf("Could not find __vdso_gettimeofday\n");
+		return 1;
+	}
 
 	struct timeval tv;
 	long ret = gtod(&tv, 0);
 
 	if (ret == 0) {
-		char buf[] = "The time is                     .000000\n";
-		to_base10(buf + 31, tv.tv_sec);
-		to_base10(buf + 38, tv.tv_usec);
-		linux_write(1, buf, sizeof(buf) - 1);
+		printf("The time is %lld.%06lld\n",
+		       (long long)tv.tv_sec, (long long)tv.tv_usec);
 	} else {
-		linux_exit(ret);
+		printf("__vdso_gettimeofday failed\n");
 	}
 
-	linux_exit(0);
+	return 0;
 }
-
-/*
- * This is the real entry point.  It passes the initial stack into
- * the C entry point.
- */
-asm (
-	".text\n"
-	".global _start\n"
-        ".type _start,@function\n"
-        "_start:\n\t"
-        "mov %rsp,%rdi\n\t"
-        "jmp c_main"
-	);

diff --git a/Makefile b/Makefile
index c761fb1..7680d7c 100644
--- a/Makefile
+++ b/Makefile

@@ -105,10 +105,6 @@
   KBUILD_OUTPUT := $(O)
 endif
 
-ifeq ("$(origin W)", "command line")
-  export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
-endif
-
 # That's our default target when none is given on the command line
 PHONY := _all
 _all:
@@ -153,8 +149,18 @@
 _all: modules
 endif
 
-srctree		:= $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR))
-objtree		:= $(CURDIR)
+ifeq ($(KBUILD_SRC),)
+        # building in the source tree
+        srctree := .
+else
+        ifeq ($(KBUILD_SRC)/,$(dir $(CURDIR)))
+                # building in a subdirectory of the source tree
+                srctree := ..
+        else
+                srctree := $(KBUILD_SRC)
+        endif
+endif
+objtree		:= .
 src		:= $(srctree)
 obj		:= $(objtree)
 
@@ -166,7 +172,7 @@
 # SUBARCH tells the usermode build what the underlying arch is.  That is set
 # first, and if a usermode build is happening, the "ARCH=um" on the command
 # line overrides the setting of ARCH below.  If a native build is happening,
-# then ARCH is assigned, getting whatever value it gets normally, and 
+# then ARCH is assigned, getting whatever value it gets normally, and
 # SUBARCH is subsequently ignored.
 
 SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
@@ -259,18 +265,18 @@
 KBUILD_MODULES :=
 KBUILD_BUILTIN := 1
 
-#	If we have only "make modules", don't compile built-in objects.
-#	When we're building modules with modversions, we need to consider
-#	the built-in objects during the descend as well, in order to
-#	make sure the checksums are up to date before we record them.
+# If we have only "make modules", don't compile built-in objects.
+# When we're building modules with modversions, we need to consider
+# the built-in objects during the descend as well, in order to
+# make sure the checksums are up to date before we record them.
 
 ifeq ($(MAKECMDGOALS),modules)
   KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1)
 endif
 
-#	If we have "make <whatever> modules", compile modules
-#	in addition to whatever we do anyway.
-#	Just "make" or "make all" shall build modules as well
+# If we have "make <whatever> modules", compile modules
+# in addition to whatever we do anyway.
+# Just "make" or "make all" shall build modules as well
 
 ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
   KBUILD_MODULES := 1
@@ -294,7 +300,7 @@
 #         cmd_cc_o_c       = $(CC) $(c_flags) -c -o $@ $<
 #
 # If $(quiet) is empty, the whole command will be printed.
-# If it is set to "quiet_", only the short version will be printed. 
+# If it is set to "quiet_", only the short version will be printed.
 # If it is set to "silent_", nothing will be printed at all, since
 # the variable $(silent_cmd_cc_o_c) doesn't exist.
 #
@@ -346,7 +352,6 @@
 include $(srctree)/scripts/Kbuild.include
 
 # Make variables (CC, etc...)
-
 AS		= $(CROSS_COMPILE)as
 LD		= $(CROSS_COMPILE)ld
 CC		= $(CROSS_COMPILE)gcc
@@ -395,8 +400,8 @@
 KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		   -fno-strict-aliasing -fno-common \
 		   -Werror-implicit-function-declaration \
-		   -Wno-format-security \
-		   $(call cc-option,-fno-delete-null-pointer-checks,)
+		   -Wno-format-security
+
 KBUILD_AFLAGS_KERNEL :=
 KBUILD_CFLAGS_KERNEL :=
 KBUILD_AFLAGS   := -D__ASSEMBLY__
@@ -504,8 +509,16 @@
 # We're called with mixed targets (*config and build targets).
 # Handle them one by one.
 
-%:: FORCE
-	$(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@
+PHONY += $(MAKECMDGOALS) __build_one_by_one
+
+$(filter-out __build_one_by_one, $(MAKECMDGOALS)): __build_one_by_one
+	@:
+
+__build_one_by_one:
+	$(Q)set -e; \
+	for i in $(MAKECMDGOALS); do \
+		$(MAKE) -f $(srctree)/Makefile $$i; \
+	done
 
 else
 ifeq ($(config-targets),1)
@@ -520,11 +533,9 @@
 export KBUILD_DEFCONFIG KBUILD_KCONFIG
 
 config: scripts_basic outputmakefile FORCE
-	$(Q)mkdir -p include/linux include/config
 	$(Q)$(MAKE) $(build)=scripts/kconfig $@
 
 %config: scripts_basic outputmakefile FORCE
-	$(Q)mkdir -p include/linux include/config
 	$(Q)$(MAKE) $(build)=scripts/kconfig $@
 
 else
@@ -594,14 +605,16 @@
 # Defaults to vmlinux, but the arch makefile usually adds further targets
 all: vmlinux
 
+include $(srctree)/arch/$(SRCARCH)/Makefile
+
+KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
+
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
 KBUILD_CFLAGS	+= -O2
 endif
 
-include $(srctree)/arch/$(SRCARCH)/Makefile
-
 ifdef CONFIG_READABLE_ASM
 # Disable optimizations that make assembler listings hard to read.
 # reorder blocks reorders the control in the function
@@ -731,6 +744,8 @@
 	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
+include $(srctree)/scripts/Makefile.extrawarn
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 KBUILD_CPPFLAGS += $(KCPPFLAGS)
 KBUILD_AFLAGS += $(KAFLAGS)
@@ -775,10 +790,10 @@
 export MODLIB
 
 #
-#  INSTALL_MOD_STRIP, if defined, will cause modules to be
-#  stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
-#  the default option --strip-debug will be used.  Otherwise,
-#  INSTALL_MOD_STRIP value will be used as the options to the strip command.
+# INSTALL_MOD_STRIP, if defined, will cause modules to be
+# stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
+# the default option --strip-debug will be used.  Otherwise,
+# INSTALL_MOD_STRIP value will be used as the options to the strip command.
 
 ifdef INSTALL_MOD_STRIP
 ifeq ($(INSTALL_MOD_STRIP),1)
@@ -863,7 +878,7 @@
 endif
 	+$(call if_changed,link-vmlinux)
 
-# The actual objects are generated when descending, 
+# The actual objects are generated when descending,
 # make sure no implicit rule kicks in
 $(sort $(vmlinux-deps)): $(vmlinux-dirs) ;
 
@@ -1021,11 +1036,11 @@
 
 all: modules
 
-#	Build modules
+# Build modules
 #
-#	A module can be listed more than once in obj-m resulting in
-#	duplicate lines in modules.order files.  Those are removed
-#	using awk while concatenating to the final file.
+# A module can be listed more than once in obj-m resulting in
+# duplicate lines in modules.order files.  Those are removed
+# using awk while concatenating to the final file.
 
 PHONY += modules
 modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) modules.builtin
@@ -1054,10 +1069,10 @@
 	@rm -rf $(MODLIB)/kernel
 	@rm -f $(MODLIB)/source
 	@mkdir -p $(MODLIB)/kernel
-	@ln -s $(srctree) $(MODLIB)/source
+	@ln -s `cd $(srctree) && /bin/pwd` $(MODLIB)/source
 	@if [ ! $(objtree) -ef  $(MODLIB)/build ]; then \
 		rm -f $(MODLIB)/build ; \
-		ln -s $(objtree) $(MODLIB)/build ; \
+		ln -s $(CURDIR) $(MODLIB)/build ; \
 	fi
 	@cp -f $(objtree)/modules.order $(MODLIB)/
 	@cp -f $(objtree)/modules.builtin $(MODLIB)/
@@ -1104,7 +1119,7 @@
 
 # Directories & files removed with 'make mrproper'
 MRPROPER_DIRS  += include/config usr/include include/generated          \
-                  arch/*/include/generated .tmp_objdiff
+		  arch/*/include/generated .tmp_objdiff
 MRPROPER_FILES += .config .config.old .version .old_version $(version_h) \
 		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
 		  signing_key.priv signing_key.x509 x509.genkey		\
@@ -1478,7 +1493,7 @@
 	$(build)=$(build-dir) $(@:.ko=.o)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
 
-# FIXME Should go into a make.lib or something 
+# FIXME Should go into a make.lib or something
 # ===========================================================================
 
 quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN   $(wildcard $(rm-dirs)))

diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi
index c767687..b03cfe4 100644
--- a/arch/arm/boot/dts/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi

@@ -26,7 +26,7 @@
 		clock-frequency = <0>;
 	};
 
-	atlclkin3_ck: atlclkin3_ck {
+	atl_clkin3_ck: atl_clkin3_ck {
 		#clock-cells = <0>;
 		compatible = "fixed-clock";
 		clock-frequency = <0>;
@@ -277,7 +277,7 @@
 
 	dpll_mpu_ck: dpll_mpu_ck {
 		#clock-cells = <0>;
-		compatible = "ti,omap4-dpll-clock";
+		compatible = "ti,omap5-mpu-dpll-clock";
 		clocks = <&sys_clkin1>, <&mpu_dpll_hs_clk_div>;
 		reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>;
 	};
@@ -730,7 +730,7 @@
 	mcasp1_ahclkr_mux: mcasp1_ahclkr_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <28>;
 		reg = <0x0550>;
 	};
@@ -738,7 +738,7 @@
 	mcasp1_ahclkx_mux: mcasp1_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x0550>;
 	};
@@ -1639,7 +1639,7 @@
 	mcasp2_ahclkr_mux: mcasp2_ahclkr_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <28>;
 		reg = <0x1860>;
 	};
@@ -1647,7 +1647,7 @@
 	mcasp2_ahclkx_mux: mcasp2_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x1860>;
 	};
@@ -1663,7 +1663,7 @@
 	mcasp3_ahclkx_mux: mcasp3_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x1868>;
 	};
@@ -1679,7 +1679,7 @@
 	mcasp4_ahclkx_mux: mcasp4_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x1898>;
 	};
@@ -1695,7 +1695,7 @@
 	mcasp5_ahclkx_mux: mcasp5_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x1878>;
 	};
@@ -1711,7 +1711,7 @@
 	mcasp6_ahclkx_mux: mcasp6_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x1904>;
 	};
@@ -1727,7 +1727,7 @@
 	mcasp7_ahclkx_mux: mcasp7_ahclkx_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <24>;
 		reg = <0x1908>;
 	};
@@ -1743,7 +1743,7 @@
 	mcasp8_ahclk_mux: mcasp8_ahclk_mux {
 		#clock-cells = <0>;
 		compatible = "ti,mux-clock";
-		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
+		clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>;
 		ti,bit-shift = <22>;
 		reg = <0x1890>;
 	};

diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi
index aeb142c..e67a23b 100644
--- a/arch/arm/boot/dts/omap54xx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi

@@ -335,7 +335,7 @@
 
 	dpll_mpu_ck: dpll_mpu_ck {
 		#clock-cells = <0>;
-		compatible = "ti,omap4-dpll-clock";
+		compatible = "ti,omap5-mpu-dpll-clock";
 		clocks = <&sys_clkin>, <&mpu_dpll_hs_clk_div>;
 		reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>;
 	};

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a6bc431..4238bcb 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c

@@ -410,7 +410,7 @@
 	 */
 	hwc->config_base	    |= (unsigned long)mapping;
 
-	if (!hwc->sample_period) {
+	if (!is_sampling_event(event)) {
 		/*
 		 * For non-sampling runs, limit the sample_period to half
 		 * of the counter width. That way, the new counter value

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index a71ae15..af9e35e 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c

@@ -126,8 +126,8 @@
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 	if (irqs < 1) {
-		pr_err("no irqs for PMUs defined\n");
-		return -ENODEV;
+		printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		return 0;
 	}
 
 	irq = platform_get_irq(pmu_device, 0);
@@ -191,6 +191,10 @@
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu->reset)
 		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+
+	/* If no interrupts available, set the corresponding capability flag */
+	if (!platform_get_irq(cpu_pmu->plat_device, 0))
+		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 }
 
 /*

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 3997c41..9d85318 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c

@@ -26,30 +26,30 @@
 #include <asm/topology.h>
 
 /*
- * cpu power scale management
+ * cpu capacity scale management
  */
 
 /*
- * cpu power table
+ * cpu capacity table
  * This per cpu data structure describes the relative capacity of each core.
  * On a heteregenous system, cores don't have the same computation capacity
- * and we reflect that difference in the cpu_power field so the scheduler can
- * take this difference into account during load balance. A per cpu structure
- * is preferred because each CPU updates its own cpu_power field during the
- * load balance except for idle cores. One idle core is selected to run the
- * rebalance_domains for all idle cores and the cpu_power can be updated
- * during this sequence.
+ * and we reflect that difference in the cpu_capacity field so the scheduler
+ * can take this difference into account during load balance. A per cpu
+ * structure is preferred because each CPU updates its own cpu_capacity field
+ * during the load balance except for idle cores. One idle core is selected
+ * to run the rebalance_domains for all idle cores and the cpu_capacity can be
+ * updated during this sequence.
  */
 static DEFINE_PER_CPU(unsigned long, cpu_scale);
 
-unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return per_cpu(cpu_scale, cpu);
 }
 
-static void set_power_scale(unsigned int cpu, unsigned long power)
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
 {
-	per_cpu(cpu_scale, cpu) = power;
+	per_cpu(cpu_scale, cpu) = capacity;
 }
 
 #ifdef CONFIG_OF
@@ -62,11 +62,11 @@
  * Table of relative efficiency of each processors
  * The efficiency value must fit in 20bit and the final
  * cpu_scale value must be in the range
- *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
  * in order to return at most 1 when DIV_ROUND_CLOSEST
  * is used to compute the capacity of a CPU.
  * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
+ * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
  */
 static const struct cpu_efficiency table_efficiency[] = {
 	{"arm,cortex-a15", 3891},
@@ -83,9 +83,9 @@
  * Iterate all CPUs' descriptor in DT and compute the efficiency
  * (as per table_efficiency). Also calculate a middle efficiency
  * as close as possible to  (max{eff_i} - min{eff_i}) / 2
- * This is later used to scale the cpu_power field such that an
- * 'average' CPU is of middle power. Also see the comments near
- * table_efficiency[] and update_cpu_power().
+ * This is later used to scale the cpu_capacity field such that an
+ * 'average' CPU is of middle capacity. Also see the comments near
+ * table_efficiency[] and update_cpu_capacity().
  */
 static void __init parse_dt_topology(void)
 {
@@ -141,15 +141,15 @@
 	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
 	 * compute a middle_capacity factor that will ensure that the capacity
 	 * of an 'average' CPU of the system will be as close as possible to
-	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * SCHED_CAPACITY_SCALE, which is the default value, but with the
 	 * constraint explained near table_efficiency[].
 	 */
 	if (4*max_capacity < (3*(max_capacity + min_capacity)))
 		middle_capacity = (min_capacity + max_capacity)
-				>> (SCHED_POWER_SHIFT+1);
+				>> (SCHED_CAPACITY_SHIFT+1);
 	else
 		middle_capacity = ((max_capacity / 3)
-				>> (SCHED_POWER_SHIFT-1)) + 1;
+				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
 
 }
 
@@ -158,20 +158,20 @@
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
  * function returns directly for SMP system.
  */
-static void update_cpu_power(unsigned int cpu)
+static void update_cpu_capacity(unsigned int cpu)
 {
 	if (!cpu_capacity(cpu))
 		return;
 
-	set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
-	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
-		cpu, arch_scale_freq_power(NULL, cpu));
+	printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
+		cpu, arch_scale_freq_capacity(NULL, cpu));
 }
 
 #else
 static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
+static inline void update_cpu_capacity(unsigned int cpuid) {}
 #endif
 
  /*
@@ -267,7 +267,7 @@
 
 	update_siblings_masks(cpuid);
 
-	update_cpu_power(cpuid);
+	update_cpu_capacity(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
@@ -297,7 +297,7 @@
 {
 	unsigned int cpu;
 
-	/* init core mask and power*/
+	/* init core mask and capacity */
 	for_each_possible_cpu(cpu) {
 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
 
@@ -307,7 +307,7 @@
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
 
-		set_power_scale(cpu, SCHED_POWER_SCALE);
+		set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
 	}
 	smp_wmb();
 

diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
index b935ed2..85e0b0c0 100644
--- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
+++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c

@@ -208,3 +208,56 @@
 		clk_put(c);
 	}
 }
+
+#ifdef CONFIG_OF
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+
+static const struct clk_ops virt_prcm_set_ops = {
+	.recalc_rate	= &omap2_table_mpu_recalc,
+	.set_rate	= &omap2_select_table_rate,
+	.round_rate	= &omap2_round_to_table_rate,
+};
+
+/**
+ * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock
+ *
+ * Does a manual init for the virtual prcm DVFS clock for OMAP2. This
+ * function is called only from omap2 DT clock init, as the virtual
+ * node is not modelled in the DT clock data.
+ */
+void omap2xxx_clkt_vps_init(void)
+{
+	struct clk_init_data init = { NULL };
+	struct clk_hw_omap *hw = NULL;
+	struct clk *clk;
+	const char *parent_name = "mpu_ck";
+	struct clk_lookup *lookup = NULL;
+
+	omap2xxx_clkt_vps_late_init();
+	omap2xxx_clkt_vps_check_bootloader_rates();
+
+	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+	lookup = kzalloc(sizeof(*lookup), GFP_KERNEL);
+	if (!hw || !lookup)
+		goto cleanup;
+	init.name = "virt_prcm_set";
+	init.ops = &virt_prcm_set_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	hw->hw.init = &init;
+
+	clk = clk_register(NULL, &hw->hw);
+
+	lookup->dev_id = NULL;
+	lookup->con_id = "cpufreq_ck";
+	lookup->clk = clk;
+
+	clkdev_add(lookup);
+	return;
+cleanup:
+	kfree(hw);
+	kfree(lookup);
+}
+#endif

diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index bda767a..12f54d4 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h

@@ -178,17 +178,6 @@
 	const struct clksel_rate *rates;
 };
 
-struct clk_hw_omap_ops {
-	void			(*find_idlest)(struct clk_hw_omap *oclk,
-					void __iomem **idlest_reg,
-					u8 *idlest_bit, u8 *idlest_val);
-	void			(*find_companion)(struct clk_hw_omap *oclk,
-					void __iomem **other_reg,
-					u8 *other_bit);
-	void			(*allow_idle)(struct clk_hw_omap *oclk);
-	void			(*deny_idle)(struct clk_hw_omap *oclk);
-};
-
 unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw,
 					unsigned long parent_rate);
 
@@ -279,8 +268,6 @@
 extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait;
 extern const struct clk_hw_omap_ops clkhwops_apll54;
 extern const struct clk_hw_omap_ops clkhwops_apll96;
-extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
-extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 
 /* clksel_rate blocks shared between OMAP44xx and AM33xx */
 extern const struct clksel_rate div_1_0_rates[];

diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h
index 539dc08..45f41a4 100644
--- a/arch/arm/mach-omap2/clock2xxx.h
+++ b/arch/arm/mach-omap2/clock2xxx.h

@@ -21,10 +21,6 @@
 				      unsigned long parent_rate);
 unsigned long omap2_osc_clk_recalc(struct clk_hw *clk,
 				   unsigned long parent_rate);
-unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
-				    unsigned long parent_rate);
-int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
-			     unsigned long parent_rate);
 void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
 unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw,
 				      unsigned long parent_rate);

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index fcd8036..6d7ba37 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c

@@ -319,6 +319,15 @@
 
 	/* Set DPLL multiplier, divider */
 	v = omap2_clk_readl(clk, dd->mult_div1_reg);
+
+	/* Handle Duty Cycle Correction */
+	if (dd->dcc_mask) {
+		if (dd->last_rounded_rate >= dd->dcc_rate)
+			v |= dd->dcc_mask; /* Enable DCC */
+		else
+			v &= ~dd->dcc_mask; /* Disable DCC */
+	}
+
 	v &= ~(dd->mult_mask | dd->div1_mask);
 	v |= dd->last_rounded_m << __ffs(dd->mult_mask);
 	v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);

diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig
index 1759fad..e66ba31 100644
--- a/arch/blackfin/configs/BF526-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF526-EZBRD_defconfig

@@ -53,7 +53,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -63,6 +62,7 @@
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=m
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set

diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
index 3577296..0207c58 100644
--- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig

@@ -58,7 +58,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
@@ -66,6 +65,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=m
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set

diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig
index 2e73a5d..99c131b 100644
--- a/arch/blackfin/configs/BF527-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT_defconfig

@@ -57,7 +57,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
@@ -65,6 +64,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=m
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set

diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
index f0a2ddf..38cb17d 100644
--- a/arch/blackfin/configs/BF548-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig

@@ -64,7 +64,6 @@
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -75,6 +74,7 @@
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_BF5XX=y
 # CONFIG_MTD_NAND_BF5XX_HWECC is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y

diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig
index 4ca39ab..a7e9bfd 100644
--- a/arch/blackfin/configs/BF609-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF609-EZKIT_defconfig

@@ -57,7 +57,6 @@
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -65,6 +64,7 @@
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_MTD_UBI=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y

diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig
index 3853c47..f4a9200 100644
--- a/arch/blackfin/configs/BlackStamp_defconfig
+++ b/arch/blackfin/configs/BlackStamp_defconfig

@@ -45,7 +45,6 @@
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
@@ -53,7 +52,7 @@
 CONFIG_MTD_ROM=m
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
-# CONFIG_M25PXX_USE_FAST_READ is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=y
 CONFIG_BLK_DEV_RAM=y

diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig
index f754e49..0ff97d8 100644
--- a/arch/blackfin/configs/H8606_defconfig
+++ b/arch/blackfin/configs/H8606_defconfig

@@ -36,13 +36,12 @@
 # CONFIG_WIRELESS is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_ROM=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_M25P80=y
-# CONFIG_M25PXX_USE_FAST_READ is not set
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_MISC_DEVICES=y
 CONFIG_EEPROM_AT25=y

diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index 8d1e4c2..40e9c2b 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h

@@ -316,6 +316,8 @@
 }
 static inline void enable_dma(unsigned int channel)
 {
+	dma_ch[channel].regs->curr_x_count = 0;
+	dma_ch[channel].regs->curr_y_count = 0;
 	dma_ch[channel].regs->cfg |= DMAEN;
 }
 int set_dma_callback(unsigned int channel, irq_handler_t callback, void *data);

diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index d098929..6f4bac9 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c

@@ -17,6 +17,7 @@
 #if IS_ENABLED(CONFIG_USB_ISP1362_HCD)
 #include <linux/usb/isp1362.h>
 #endif
+#include <linux/gpio.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
 #include <asm/dma.h>

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 21c9f30..790352f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug

@@ -235,11 +235,6 @@
 	  Select this to enable early debugging for Nintendo GameCube/Wii
 	  consoles via an external USB Gecko adapter.
 
-config PPC_EARLY_DEBUG_WSP
-	bool "Early debugging via WSP's internal UART"
-	depends on PPC_WSP
-	select PPC_UDBG_16550
-
 config PPC_EARLY_DEBUG_PS3GELIC
 	bool "Early debugging through the PS3 Ethernet port"
 	depends on PPC_PS3

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 426dce7..ccc25ed 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile

@@ -333,8 +333,8 @@
 $(obj)/zImage.initrd.%: vmlinux $(wrapperbits)
 	$(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz)
 
-$(obj)/zImage.%: vmlinux $(wrapperbits)
-	$(call if_changed,wrap,$*)
+$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits)
+	$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
 
 # dtbImage% - a dtbImage is a zImage with an embedded device tree blob
 $(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb

diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
deleted file mode 100644
index 4f35fc4..0000000
--- a/arch/powerpc/configs/chroma_defconfig
+++ /dev/null

@@ -1,307 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
-CONFIG_SMP=y
-CONFIG_NR_CPUS=256
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=19
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
-CONFIG_NAMESPACES=y
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_INITRAMFS_COMPRESSION_GZIP=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_SCOM_DEBUGFS=y
-CONFIG_PPC_A2_DD2=y
-CONFIG_KVM_GUEST=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_100=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_MISC=y
-CONFIG_NUMA=y
-# CONFIG_MIGRATION is not set
-CONFIG_PPC_64K_PAGES=y
-CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
-# CONFIG_SECCOMP is not set
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_MSI=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_IPV6=y
-CONFIG_IPV6_PRIVACY=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-CONFIG_IPV6_OPTIMISTIC_DAD=y
-CONFIG_INET6_AH=y
-CONFIG_INET6_ESP=y
-CONFIG_INET6_IPCOMP=y
-CONFIG_IPV6_MIP6=y
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y
-CONFIG_IPV6_TUNNEL=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_UDPLITE=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_NET_TCPPROBE=y
-# CONFIG_WIRELESS is not set
-CONFIG_NET_9P=y
-CONFIG_NET_9P_DEBUG=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_LE_BYTE_SWAP=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=y
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_CDROM_PKTCDVD=y
-CONFIG_MISC_DEVICES=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SRP_ATTRS=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_SIL24=y
-CONFIG_SATA_MV=y
-CONFIG_SATA_SIL=y
-CONFIG_PATA_CMD64X=y
-CONFIG_PATA_MARVELL=y
-CONFIG_PATA_SIL680=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_SNAPSHOT=y
-CONFIG_DM_MIRROR=y
-CONFIG_DM_ZERO=y
-CONFIG_DM_UEVENT=y
-CONFIG_NETDEVICES=y
-CONFIG_TUN=y
-CONFIG_E1000E=y
-CONFIG_TIGON3=y
-# CONFIG_WLAN is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_HW_RANDOM=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
-# CONFIG_HWMON is not set
-# CONFIG_VGA_ARB is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1511=y
-CONFIG_RTC_DRV_DS1553=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_EXT4_FS=y
-# CONFIG_DNOTIFY is not set
-CONFIG_FUSE_FS=y
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_CONFIGFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFS_V4_1=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=y
-CONFIG_CIFS_WEAK_PW_HASH=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=m
-CONFIG_PRINTK_TIME=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_STRIP_ASM_SYMS=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_PPC_EMULATED_STATS=y
-CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_VIRTUALIZATION=y

diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index f42e9ba..7c8608b 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h

@@ -489,7 +489,6 @@
 #define FCC_GFMR_TCI		((uint)0x20000000)
 #define FCC_GFMR_TRX		((uint)0x10000000)
 #define FCC_GFMR_TTX		((uint)0x08000000)
-#define FCC_GFMR_TTX		((uint)0x08000000)
 #define FCC_GFMR_CDP		((uint)0x04000000)
 #define FCC_GFMR_CTSP		((uint)0x02000000)
 #define FCC_GFMR_CDS		((uint)0x01000000)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index b76f58c..fab7743 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h

@@ -254,6 +254,7 @@
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
+const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
 void *eeh_dev_init(struct device_node *dn, void *data);

diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 89d5670..1e551a2 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h

@@ -33,7 +33,7 @@
 
 int eeh_event_init(void);
 int eeh_send_failure_event(struct eeh_pe *pe);
-void eeh_remove_event(struct eeh_pe *pe);
+void eeh_remove_event(struct eeh_pe *pe, bool force);
 void eeh_handle_event(struct eeh_pe *pe);
 
 #endif /* __KERNEL__ */

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 901dac6..d0918e0 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h

@@ -223,10 +223,6 @@
 	unsigned int	id;
 	unsigned int	active;
 	unsigned long	vdso_base;
-#ifdef CONFIG_PPC_ICSWX
-	struct spinlock *cop_lockp;	/* guard cop related stuff */
-	unsigned long acop;		/* mask of enabled coprocessor types */
-#endif /* CONFIG_PPC_ICSWX */
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;   /* SLB page size encodings */
 	u64 high_slices_psize;  /* 4 bits per slice for now */

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index cb15cbb..4600188 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h

@@ -599,9 +599,9 @@
 };
 
 struct OpalIoPhbErrorCommon {
-	uint32_t version;
-	uint32_t ioType;
-	uint32_t len;
+	__be32 version;
+	__be32 ioType;
+	__be32 len;
 };
 
 struct OpalIoP7IOCPhbErrorData {
@@ -666,64 +666,64 @@
 struct OpalIoPhb3ErrorData {
 	struct OpalIoPhbErrorCommon common;
 
-	uint32_t brdgCtl;
+	__be32 brdgCtl;
 
 	/* PHB3 UTL regs */
-	uint32_t portStatusReg;
-	uint32_t rootCmplxStatus;
-	uint32_t busAgentStatus;
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
 
 	/* PHB3 cfg regs */
-	uint32_t deviceStatus;
-	uint32_t slotStatus;
-	uint32_t linkStatus;
-	uint32_t devCmdStatus;
-	uint32_t devSecStatus;
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
 
 	/* cfg AER regs */
-	uint32_t rootErrorStatus;
-	uint32_t uncorrErrorStatus;
-	uint32_t corrErrorStatus;
-	uint32_t tlpHdr1;
-	uint32_t tlpHdr2;
-	uint32_t tlpHdr3;
-	uint32_t tlpHdr4;
-	uint32_t sourceId;
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
 
-	uint32_t rsv3;
+	__be32 rsv3;
 
 	/* Record data about the call to allocate a buffer */
-	uint64_t errorClass;
-	uint64_t correlator;
+	__be64 errorClass;
+	__be64 correlator;
 
-	uint64_t nFir;			/* 000 */
-	uint64_t nFirMask;		/* 003 */
-	uint64_t nFirWOF;		/* 008 */
+	__be64 nFir;			/* 000 */
+	__be64 nFirMask;		/* 003 */
+	__be64 nFirWOF;		/* 008 */
 
 	/* PHB3 MMIO Error Regs */
-	uint64_t phbPlssr;		/* 120 */
-	uint64_t phbCsr;		/* 110 */
-	uint64_t lemFir;		/* C00 */
-	uint64_t lemErrorMask;		/* C18 */
-	uint64_t lemWOF;		/* C40 */
-	uint64_t phbErrorStatus;	/* C80 */
-	uint64_t phbFirstErrorStatus;	/* C88 */
-	uint64_t phbErrorLog0;		/* CC0 */
-	uint64_t phbErrorLog1;		/* CC8 */
-	uint64_t mmioErrorStatus;	/* D00 */
-	uint64_t mmioFirstErrorStatus;	/* D08 */
-	uint64_t mmioErrorLog0;		/* D40 */
-	uint64_t mmioErrorLog1;		/* D48 */
-	uint64_t dma0ErrorStatus;	/* D80 */
-	uint64_t dma0FirstErrorStatus;	/* D88 */
-	uint64_t dma0ErrorLog0;		/* DC0 */
-	uint64_t dma0ErrorLog1;		/* DC8 */
-	uint64_t dma1ErrorStatus;	/* E00 */
-	uint64_t dma1FirstErrorStatus;	/* E08 */
-	uint64_t dma1ErrorLog0;		/* E40 */
-	uint64_t dma1ErrorLog1;		/* E48 */
-	uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS];
-	uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 phbPlssr;		/* 120 */
+	__be64 phbCsr;		/* 110 */
+	__be64 lemFir;		/* C00 */
+	__be64 lemErrorMask;		/* C18 */
+	__be64 lemWOF;		/* C40 */
+	__be64 phbErrorStatus;	/* C80 */
+	__be64 phbFirstErrorStatus;	/* C88 */
+	__be64 phbErrorLog0;		/* CC0 */
+	__be64 phbErrorLog1;		/* CC8 */
+	__be64 mmioErrorStatus;	/* D00 */
+	__be64 mmioFirstErrorStatus;	/* D08 */
+	__be64 mmioErrorLog0;		/* D40 */
+	__be64 mmioErrorLog1;		/* D48 */
+	__be64 dma0ErrorStatus;	/* D80 */
+	__be64 dma0FirstErrorStatus;	/* D88 */
+	__be64 dma0ErrorLog0;		/* DC0 */
+	__be64 dma0ErrorLog1;		/* DC8 */
+	__be64 dma1ErrorStatus;	/* E00 */
+	__be64 dma1FirstErrorStatus;	/* E08 */
+	__be64 dma1ErrorLog0;		/* E40 */
+	__be64 dma1ErrorLog1;		/* E48 */
+	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
 };
 
 enum {
@@ -851,8 +851,8 @@
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
 int64_t opal_get_epow_status(__be64 *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
-int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
-			    uint16_t *pci_error_type, uint16_t *severity);
+int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
+			    __be16 *pci_error_type, __be16 *severity);
 int64_t opal_pci_poll(uint64_t phb_id);
 int64_t opal_return_cpu(void);
 int64_t opal_reinit_cpus(uint64_t flags);

diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
index 3d52a11..3ba9c6f 100644
--- a/arch/powerpc/include/asm/reg_a2.h
+++ b/arch/powerpc/include/asm/reg_a2.h

@@ -110,15 +110,6 @@
 #define TLB1_UR			ASM_CONST(0x0000000000000002)
 #define TLB1_SR			ASM_CONST(0x0000000000000001)
 
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-#define WSP_UART_PHYS	0xffc000c000
-/* This needs to be careful chosen to hit a !0 congruence class
- * in the TLB since we bolt it in way 3, which is already occupied
- * by our linear mapping primary bolted entry in CC 0.
- */
-#define WSP_UART_VIRT	0xf000000000001000
-#endif
-
 /* A2 erativax attributes definitions */
 #define ERATIVAX_RS_IS_ALL		0x000
 #define ERATIVAX_RS_IS_TID		0x040

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 0e83e7d..58abeda 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h

@@ -16,13 +16,15 @@
 extern struct task_struct *_switch(struct thread_struct *prev,
 				   struct thread_struct *next);
 #ifdef CONFIG_PPC_BOOK3S_64
-static inline void save_tar(struct thread_struct *prev)
+static inline void save_early_sprs(struct thread_struct *prev)
 {
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		prev->tar = mfspr(SPRN_TAR);
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		prev->dscr = mfspr(SPRN_DSCR);
 }
 #else
-static inline void save_tar(struct thread_struct *prev) {}
+static inline void save_early_sprs(struct thread_struct *prev) {}
 #endif
 
 extern void enable_kernel_fp(void);
@@ -84,6 +86,8 @@
 {
 #ifdef CONFIG_PPC_BOOK3S_64
     /* EBB perf events are not inherited, so clear all EBB state. */
+    t->thread.ebbrr = 0;
+    t->thread.ebbhr = 0;
     t->thread.bescr = 0;
     t->thread.mmcr2 = 0;
     t->thread.mmcr0 = 0;

diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h
deleted file mode 100644
index c7dc830..0000000
--- a/arch/powerpc/include/asm/wsp.h
+++ /dev/null

@@ -1,14 +0,0 @@
-/*
- *  Copyright 2011 Michael Ellerman, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-#ifndef __ASM_POWERPC_WSP_H
-#define __ASM_POWERPC_WSP_H
-
-extern int wsp_get_chip_id(struct device_node *dn);
-
-#endif /* __ASM_POWERPC_WSP_H */

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 5b76579..de2c0e4 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h

@@ -41,5 +41,6 @@
 #define PPC_FEATURE2_EBB		0x10000000
 #define PPC_FEATURE2_ISEL		0x08000000
 #define PPC_FEATURE2_TAR		0x04000000
+#define PPC_FEATURE2_VEC_CRYPTO		0x02000000
 
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fab19ec..670c312 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile

@@ -43,7 +43,6 @@
 obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
-obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o

diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
deleted file mode 100644
index 61f079e..0000000
--- a/arch/powerpc/kernel/cpu_setup_a2.S
+++ /dev/null

@@ -1,120 +0,0 @@
-/*
- *  A2 specific assembly support code
- *
- *  Copyright 2009 Ben Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/processor.h>
-#include <asm/reg_a2.h>
-#include <asm/reg.h>
-#include <asm/thread_info.h>
-
-/*
- * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
- * This also prevents external LPID accesses but that isn't a problem when not a
- * guest. Under PV, this setting will be ignored and MMUCR will return the right
- * number of PID bits we can use.
- */
-#define MMUCR1_EXTEND_PID \
-	(MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
-	 MMUCR1_DTTID | MMUCR1_DCCD)
-
-/*
- * Use extended PIDs if enabled.
- * Don't clear the ERATs on context sync events and enable I & D LRU.
- * Enable ERAT back invalidate when tlbwe overwrites an entry.
- */
-#define INITIAL_MMUCR1 \
-	(MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
-	 MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
-
-_GLOBAL(__setup_cpu_a2)
-	/* Some of these are actually thread local and some are
-	 * core local but doing it always won't hurt
-	 */
-
-#ifdef CONFIG_PPC_ICSWX
-	/* Make sure ACOP starts out as zero */
-	li	r3,0
-	mtspr   SPRN_ACOP,r3
-
-	/* Skip the following if we are in Guest mode */
-	mfmsr	r3
-	andis.	r0,r3,MSR_GS@h
-	bne	_icswx_skip_guest
-
-	/* Enable icswx instruction */
-	mfspr   r3,SPRN_A2_CCR2
-	ori     r3,r3,A2_CCR2_ENABLE_ICSWX
-	mtspr   SPRN_A2_CCR2,r3
-
-	/* Unmask all CTs in HACOP */
-	li      r3,-1
-	mtspr   SPRN_HACOP,r3
-_icswx_skip_guest:
-#endif /* CONFIG_PPC_ICSWX */
-
-	/* Enable doorbell */
-	mfspr   r3,SPRN_A2_CCR2
-	oris     r3,r3,A2_CCR2_ENABLE_PC@h
-	mtspr   SPRN_A2_CCR2,r3
-	isync
-
-	/* Setup CCR0 to disable power saving for now as it's busted
-	 * in the current implementations. Setup CCR1 to wake on
-	 * interrupts normally (we write the default value but who
-	 * knows what FW may have clobbered...)
-	 */
-	li	r3,0
-	mtspr	SPRN_A2_CCR0, r3
-	LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
-	mtspr	SPRN_A2_CCR1, r3
-
-	/* Initialise MMUCR1 */
-	lis	r3,INITIAL_MMUCR1@h
-	ori	r3,r3,INITIAL_MMUCR1@l
-	mtspr	SPRN_MMUCR1,r3
-
-	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
-	LOAD_REG_IMMEDIATE(r3, 0x000a7531)
-	mtspr	SPRN_MMUCR2,r3
-
-	/* Set MMUCR3 to write all thids bit to the TLB */
-	LOAD_REG_IMMEDIATE(r3, 0x0000000f)
-	mtspr	SPRN_MMUCR3,r3
-
-	/* Don't do ERAT stuff if running guest mode */
-	mfmsr	r3
-	andis.	r0,r3,MSR_GS@h
-	bne	1f
-
-	/* Now set the I-ERAT watermark to 15 */
-	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
-	mtspr	SPRN_MMUCR0, r4
-	li	r4,A2_IERAT_SIZE-1
-	PPC_ERATWE(R4,R4,3)
-
-	/* Now set the D-ERAT watermark to 31 */
-	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
-	mtspr	SPRN_MMUCR0, r4
-	li	r4,A2_DERAT_SIZE-1
-	PPC_ERATWE(R4,R4,3)
-
-	/* And invalidate the beast just in case. That won't get rid of
-	 * a bolted entry though it will be in LRU and so will go away eventually
-	 * but let's not bother for now
-	 */
-	PPC_ERATILX(0,0,R0)
-1:
-	blr
-
-_GLOBAL(__restore_cpu_a2)
-	b	__setup_cpu_a2

diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 1557e7c..4673353 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S

@@ -56,6 +56,7 @@
 	li	r0,0
 	mtspr	SPRN_LPID,r0
 	mfspr	r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power8
@@ -74,6 +75,7 @@
 	li	r0,0
 	mtspr	SPRN_LPID,r0
 	mfspr   r3,SPRN_LPCR
+	ori	r3, r3, LPCR_PECEDH
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power8

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c1faade..965291b 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c

@@ -109,7 +109,8 @@
 				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
 #define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \
 				 PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
-				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR)
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+				 PPC_FEATURE2_VEC_CRYPTO)
 #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -2148,44 +2149,6 @@
 	}
 #endif /* CONFIG_PPC32 */
 #endif /* CONFIG_E500 */
-
-#ifdef CONFIG_PPC_A2
-	{	/* Standard A2 (>= DD2) + FPU core */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00480000,
-		.cpu_name		= "A2 (>= DD2)",
-		.cpu_features		= CPU_FTRS_A2,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTRS_A2,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 0,
-		.cpu_setup		= __setup_cpu_a2,
-		.cpu_restore		= __restore_cpu_a2,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppca2",
-	},
-	{	/* This is a default entry to get going, to be replaced by
-		 * a real one at some stage
-		 */
-#define CPU_FTRS_BASE_BOOK3E	(CPU_FTR_USE_TB | \
-	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \
-	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "Book3E",
-		.cpu_features		= CPU_FTRS_BASE_BOOK3E,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
-					  MMU_FTR_USE_TLBIVAX_BCAST |
-					  MMU_FTR_LOCK_BCAST_INVAL,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 0,
-		.machine_check		= machine_check_generic,
-		.platform		= "power6",
-	},
-#endif /* CONFIG_PPC_A2 */
 };
 
 static struct cpu_spec the_cpu_spec;

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 7051ea3..86e2570 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c

@@ -330,8 +330,8 @@
 	eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 	eeh_serialize_unlock(flags);
 
-	pr_err("EEH: PHB#%x failure detected\n",
-		phb_pe->phb->global_number);
+	pr_err("EEH: PHB#%x failure detected, location: %s\n",
+		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
 	dump_stack();
 	eeh_send_failure_event(phb_pe);
 
@@ -358,10 +358,11 @@
 int eeh_dev_check_failure(struct eeh_dev *edev)
 {
 	int ret;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	unsigned long flags;
 	struct device_node *dn;
 	struct pci_dev *dev;
-	struct eeh_pe *pe;
+	struct eeh_pe *pe, *parent_pe, *phb_pe;
 	int rc = 0;
 	const char *location;
 
@@ -439,14 +440,34 @@
 	 */
 	if ((ret < 0) ||
 	    (ret == EEH_STATE_NOT_SUPPORT) ||
-	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
-	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+	    ((ret & active_flags) == active_flags)) {
 		eeh_stats.false_positives++;
 		pe->false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
+	/*
+	 * It should be corner case that the parent PE has been
+	 * put into frozen state as well. We should take care
+	 * that at first.
+	 */
+	parent_pe = pe->parent;
+	while (parent_pe) {
+		/* Hit the ceiling ? */
+		if (parent_pe->type & EEH_PE_PHB)
+			break;
+
+		/* Frozen parent PE ? */
+		ret = eeh_ops->get_state(parent_pe, NULL);
+		if (ret > 0 &&
+		    (ret & active_flags) != active_flags)
+			pe = parent_pe;
+
+		/* Next parent level */
+		parent_pe = parent_pe->parent;
+	}
+
 	eeh_stats.slot_resets++;
 
 	/* Avoid repeated reports of this failure, including problems
@@ -460,8 +481,11 @@
 	 * a stack trace will help the device-driver authors figure
 	 * out what happened.  So print that out.
 	 */
-	pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
-		pe->addr, pe->phb->global_number);
+	phb_pe = eeh_phb_pe_get(pe->phb);
+	pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+	       pe->phb->global_number, pe->addr);
+	pr_err("EEH: PE location: %s, PHB location: %s\n",
+	       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
 	dump_stack();
 
 	eeh_send_failure_event(pe);

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7100a5b..420da61 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c

@@ -447,8 +447,9 @@
  * PE reset (for 3 times), we try to clear the frozen state
  * for 3 times as well.
  */
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
 {
+	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int i, rc;
 
 	for (i = 0; i < 3; i++) {
@@ -461,13 +462,24 @@
 	}
 
 	/* The PE has been isolated, clear it */
-	if (rc)
+	if (rc) {
 		pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
 			__func__, pe->phb->global_number, pe->addr, rc);
-	else
+		return (void *)pe;
+	}
+
+	return NULL;
+}
+
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+{
+	void *rc;
+
+	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+	if (!rc)
 		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 
-	return rc;
+	return rc ? -EIO : 0;
 }
 
 /**
@@ -758,7 +770,7 @@
 			eeh_serialize_lock(&flags);
 
 			/* Purge all events */
-			eeh_remove_event(NULL);
+			eeh_remove_event(NULL, true);
 
 			list_for_each_entry(hose, &hose_list, list_node) {
 				phb_pe = eeh_phb_pe_get(hose);
@@ -777,7 +789,7 @@
 			eeh_serialize_lock(&flags);
 
 			/* Purge all events of the PHB */
-			eeh_remove_event(pe);
+			eeh_remove_event(pe, true);
 
 			if (rc == EEH_NEXT_ERR_DEAD_PHB)
 				eeh_pe_state_mark(pe, EEH_PE_ISOLATED);

diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 72d748b..4eefb6e 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c

@@ -152,24 +152,33 @@
 /**
  * eeh_remove_event - Remove EEH event from the queue
  * @pe: Event binding to the PE
+ * @force: Event will be removed unconditionally
  *
  * On PowerNV platform, we might have subsequent coming events
  * is part of the former one. For that case, those subsequent
  * coming events are totally duplicated and unnecessary, thus
  * they should be removed.
  */
-void eeh_remove_event(struct eeh_pe *pe)
+void eeh_remove_event(struct eeh_pe *pe, bool force)
 {
 	unsigned long flags;
 	struct eeh_event *event, *tmp;
 
+	/*
+	 * If we have NULL PE passed in, we have dead IOC
+	 * or we're sure we can report all existing errors
+	 * by the caller.
+	 *
+	 * With "force", the event with associated PE that
+	 * have been isolated, the event won't be removed
+	 * to avoid event lost.
+	 */
 	spin_lock_irqsave(&eeh_eventlist_lock, flags);
 	list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
-		/*
-		 * If we don't have valid PE passed in, that means
-		 * we already have event corresponding to dead IOC
-		 * and all events should be purged.
-		 */
+		if (!force && event->pe &&
+		    (event->pe->state & EEH_PE_ISOLATED))
+			continue;
+
 		if (!pe) {
 			list_del(&event->list);
 			kfree(event);

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 995c2a2..fbd01eb 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c

@@ -792,6 +792,66 @@
 }
 
 /**
+ * eeh_pe_loc_get - Retrieve location code binding to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the location code of the given PE. If the primary PE bus
+ * is root bus, we will grab location code from PHB device tree node
+ * or root port. Otherwise, the upstream bridge's device tree node
+ * of the primary PE bus will be checked for the location code.
+ */
+const char *eeh_pe_loc_get(struct eeh_pe *pe)
+{
+	struct pci_controller *hose;
+	struct pci_bus *bus = eeh_pe_bus_get(pe);
+	struct pci_dev *pdev;
+	struct device_node *dn;
+	const char *loc;
+
+	if (!bus)
+		return "N/A";
+
+	/* PHB PE or root PE ? */
+	if (pci_is_root_bus(bus)) {
+		hose = pci_bus_to_host(bus);
+		loc = of_get_property(hose->dn,
+				"ibm,loc-code", NULL);
+		if (loc)
+			return loc;
+		loc = of_get_property(hose->dn,
+				"ibm,io-base-loc-code", NULL);
+		if (loc)
+			return loc;
+
+		pdev = pci_get_slot(bus, 0x0);
+	} else {
+		pdev = bus->self;
+	}
+
+	if (!pdev) {
+		loc = "N/A";
+		goto out;
+	}
+
+	dn = pci_device_to_OF_node(pdev);
+	if (!dn) {
+		loc = "N/A";
+		goto out;
+	}
+
+	loc = of_get_property(dn, "ibm,loc-code", NULL);
+	if (!loc)
+		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
+	if (!loc)
+		loc = "N/A";
+
+out:
+	if (pci_is_root_bus(bus) && pdev)
+		pci_dev_put(pdev);
+	return loc;
+}
+
+/**
  * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
  * @pe: EEH PE
  *

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 911d453..6528c5e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S

@@ -428,12 +428,6 @@
 	std	r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-	mfspr	r25,SPRN_DSCR
-	std	r25,THREAD_DSCR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
 	and.	r0,r0,r22
 	beq+	1f
 	andc	r22,r22,r0

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 771b4e9..bb9cac6 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S

@@ -1467,22 +1467,6 @@
 	.globl  a2_tlbinit_after_iprot_flush
 a2_tlbinit_after_iprot_flush:
 
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-	/* Now establish early debug mappings if applicable */
-	/* Restore the MAS0 we used for linear mapping load */
-	mtspr	SPRN_MAS0,r11
-
-	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
-	ori	r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
-	mtspr	SPRN_MAS1,r3
-	LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
-	mtspr	SPRN_MAS2,r3
-	LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
-	mtspr	SPRN_MAS7_MAS3,r3
-	/* re-use the MAS8 value from the linear mapping */
-	tlbwe
-#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
-
 	PPC_TLBILX(0,0,R0)
 	sync
 	isync

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 20f11eb..a7d36b1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S

@@ -439,9 +439,9 @@
 	 * R9		= CR
 	 * Original R9 to R13 is saved on PACA_EXMC
 	 *
-	 * Switch to mc_emergency stack and handle re-entrancy (though we
-	 * currently don't test for overflow). Save MCE registers srr1,
-	 * srr0, dar and dsisr and then set ME=1
+	 * Switch to mc_emergency stack and handle re-entrancy (we limit
+	 * the nested MCE upto level 4 to avoid stack overflow).
+	 * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
 	 *
 	 * We use paca->in_mce to check whether this is the first entry or
 	 * nested machine check. We increment paca->in_mce to track nested
@@ -464,6 +464,9 @@
 0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
 	addi	r10,r10,1		/* increment paca->in_mce */
 	sth	r10,PACA_IN_MCE(r13)
+	/* Limit nested MCE to level 4 to avoid stack overflow */
+	cmpwi	r10,4
+	bgt	2f			/* Check if we hit limit of 4 */
 	std	r11,GPR1(r1)		/* Save r1 on the stack. */
 	std	r11,0(r1)		/* make stack chain pointer */
 	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
@@ -482,10 +485,23 @@
 	ori	r11,r11,MSR_RI		/* turn on RI bit */
 	ld	r12,PACAKBASE(r13)	/* get high part of &label */
 	LOAD_HANDLER(r12, machine_check_handle_early)
-	mtspr	SPRN_SRR0,r12
+1:	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r11
 	rfid
 	b	.	/* prevent speculative execution */
+2:
+	/* Stack overflow. Stay on emergency stack and panic.
+	 * Keep the ME bit off while panic-ing, so that if we hit
+	 * another machine check we checkstop.
+	 */
+	addi	r1,r1,INT_FRAME_SIZE	/* go back to previous stack frame */
+	ld	r11,PACAKMSR(r13)
+	ld	r12,PACAKBASE(r13)
+	LOAD_HANDLER(r12, unrecover_mce)
+	li	r10,MSR_ME
+	andc	r11,r11,r10		/* Turn off MSR_ME */
+	b	1b
+	b	.	/* prevent speculative execution */
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
 machine_check_pSeries:
@@ -1389,6 +1405,7 @@
 	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	machine_check_early
+	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
@@ -1443,11 +1460,33 @@
 	 */
 	andi.	r11,r12,MSR_RI
 	bne	2f
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unrecoverable_exception
-	b	1b
+1:	mfspr	r11,SPRN_SRR0
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10,unrecover_mce)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
+	/*
+	 * We are going down. But there are chances that we might get hit by
+	 * another MCE during panic path and we may run into unstable state
+	 * with no way out. Hence, turn ME bit off while going down, so that
+	 * when another MCE is hit during panic path, system will checkstop
+	 * and hypervisor will get restarted cleanly by SP.
+	 */
+	li	r3,MSR_ME
+	andc	r10,r10,r3		/* Turn off MSR_ME */
+	mtspr	SPRN_SRR1,r10
+	rfid
+	b	.
 2:
 	/*
+	 * Check if we have successfully handled/recovered from error, if not
+	 * then stay on emergency stack and panic.
+	 */
+	ld	r3,RESULT(r1)	/* Load result */
+	cmpdi	r3,0		/* see if we handled MCE successfully */
+
+	beq	1b		/* if !handled then panic */
+	/*
 	 * Return from MC interrupt.
 	 * Queue up the MCE event so that we can log it later, while
 	 * returning from kernel or opal call.
@@ -1460,6 +1499,17 @@
 	MACHINE_CHECK_HANDLER_WINDUP
 	b	machine_check_pSeries
 
+unrecover_mce:
+	/* Invoke machine_check_exception to print MCE event and panic. */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	machine_check_exception
+	/*
+	 * We will not reach here. Even if we did, there is no way out. Call
+	 * unrecoverable_exception and die.
+	 */
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	unrecoverable_exception
+	b	1b
 /*
  * r13 points to the PACA, r9 contains the saved CR,
  * r12 contain the saved SRR1, SRR0 is still ready for return

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 67ee0d6..7d7d863 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S

@@ -930,25 +930,6 @@
 	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */
 	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */
 
-#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
-
-	/* Load a TLB entry for the UART, so that ppc4xx_progress() can use
-	 * the UARTs nice and early.  We use a 4k real==virtual mapping. */
-
-	lis	r3,SERIAL_DEBUG_IO_BASE@h
-	ori	r3,r3,SERIAL_DEBUG_IO_BASE@l
-	mr	r4,r3
-	clrrwi	r4,r4,12
-	ori	r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
-
-	clrrwi	r3,r3,12
-	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
-
-	li	r0,0			/* TLB slot 0 */
-	tlbwe	r4,r0,TLB_DATA
-	tlbwe	r3,r0,TLB_TAG
-#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
-
 	isync
 
 	/* Establish the exception vector base

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8a1edbe..be99774 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c

@@ -755,15 +755,15 @@
 
 	WARN_ON(!irqs_disabled());
 
-	/* Back up the TAR across context switches.
+	/* Back up the TAR and DSCR across context switches.
 	 * Note that the TAR is not available for use in the kernel.  (To
 	 * provide this, the TAR should be backed up/restored on exception
 	 * entry/exit instead, and be in pt_regs.  FIXME, this should be in
 	 * pt_regs anyway (for debug).)
-	 * Save the TAR here before we do treclaim/trecheckpoint as these
-	 * will change the TAR.
+	 * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
+	 * these will change them.
 	 */
-	save_tar(&prev->thread);
+	save_early_sprs(&prev->thread);
 
 	__switch_to_tm(prev);
 

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d4d4183..e239df3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c

@@ -471,7 +471,7 @@
 		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
 			DBG("    thread %d -> cpu %d (hard id %d)\n",
 			    j, cpu, be32_to_cpu(intserv[j]));
-			set_cpu_present(cpu, true);
+			set_cpu_present(cpu, of_device_is_available(dn));
 			set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
 			set_cpu_possible(cpu, true);
 			cpu++;

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 7753af2..51a3ff7 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -749,7 +749,7 @@
 /* cpumask of CPUs with asymetric SMT dependancy */
 static const int powerpc_smt_flags(void)
 {
-	int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 
 	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7e711bd..9fff9cd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c

@@ -551,7 +551,7 @@
 	may_hard_irq_enable();
 
 
-#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
+#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
 #endif

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1bd7ca2..239f1cd 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c

@@ -295,6 +295,8 @@
 {
 	long handled = 0;
 
+	__get_cpu_var(irq_stat).mce_exceptions++;
+
 	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;

diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index a158375..b7aa072 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c

@@ -62,8 +62,6 @@
 	udbg_init_cpm();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
 	udbg_init_usbgecko();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
-	udbg_init_wsp();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)
 	/* In memory console */
 	udbg_init_memcons();

diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 75702e2..6e7c492 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c

@@ -296,14 +296,3 @@
 }
 
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
-
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-
-void __init udbg_init_wsp(void)
-{
-	udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1);
-	udbg_uart_setup(57600, 50000000);
-}
-
-#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */

diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 768a9f9..3a5c568 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c

@@ -113,10 +113,8 @@
 	 * We assume that if the condition is recovered then linux host
 	 * will have generated an error log event that we will pick
 	 * up and log later.
-	 * Don't release mce event now. In case if condition is not
-	 * recovered we do guest exit and go back to linux host machine
-	 * check handler. Hence we need make sure that current mce event
-	 * is available for linux host to consume.
+	 * Don't release mce event now. We will queue up the event so that
+	 * we can log the MCE event info on host console.
 	 */
 	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
 		goto out;
@@ -128,11 +126,12 @@
 
 out:
 	/*
-	 * If we have handled the error, then release the mce event because
-	 * we will be delivering machine check to guest.
+	 * We are now going enter guest either through machine check
+	 * interrupt (for unhandled errors) or will continue from
+	 * current HSRR0 (for handled errors) in guest. Hence
+	 * queue up the event so that we can log it from host console later.
 	 */
-	if (handled)
-		release_mce_event();
+	machine_check_queue_event();
 
 	return handled;
 }

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 77356fd..868347e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -2257,15 +2257,28 @@
 	mr	r3, r9		/* get vcpu pointer */
 	bl	kvmppc_realmode_machine_check
 	nop
-	cmpdi	r3, 0		/* continue exiting from guest? */
+	cmpdi	r3, 0		/* Did we handle MCE ? */
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-	beq	mc_cont
+	/*
+	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
+	 * machine check interrupt (set HSRR0 to 0x200). And for handled
+	 * errors (no-fatal), just go back to guest execution with current
+	 * HSRR0 instead of exiting guest. This new approach will inject
+	 * machine check to guest for fatal error causing guest to crash.
+	 *
+	 * The old code used to return to host for unhandled errors which
+	 * was causing guest to hang with soft lockups inside guest and
+	 * makes it difficult to recover guest instance.
+	 */
+	ld	r10, VCPU_PC(r9)
+	ld	r11, VCPU_MSR(r9)
+	bne	2f	/* Continue guest execution. */
 	/* If not, deliver a machine check.  SRR0/1 are already set */
 	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
 	ld	r11, VCPU_MSR(r9)
 	bl	kvmppc_msr_interrupt
-	b	fast_interrupt_c_return
+2:	b	fast_interrupt_c_return
 
 /*
  * Check the reason we woke from nap, and take appropriate action.

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c0511c2..412dd46 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c

@@ -1470,7 +1470,7 @@
 				regs->gpr[rd] = byterev_4(val);
 			goto ldst_done;
 
-#ifdef CONFIG_PPC_CPU
+#ifdef CONFIG_PPC_FPU
 		case 535:	/* lfsx */
 		case 567:	/* lfsux */
 			if (!(regs->msr & MSR_FP))

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index bf9c6d4..391b3f6 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig

@@ -19,7 +19,6 @@
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
-source "arch/powerpc/platforms/wsp/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 43b65ad..a41bd02 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype

@@ -148,10 +148,6 @@
 	depends on PPC64 && PPC_BOOK3S
 	def_bool y
 
-config PPC_A2
-	bool
-	depends on PPC_BOOK3E_64
-
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
@@ -280,7 +276,7 @@
 
 config PPC_ICSWX
 	bool "Support for PowerPC icswx coprocessor instruction"
-	depends on POWER4 || PPC_A2
+	depends on POWER4
 	default n
 	---help---
 

diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 879b4a4..469ef17 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile

@@ -22,4 +22,3 @@
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
-obj-$(CONFIG_PPC_WSP)		+= wsp/

diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 0ba3c95..bcfd6f0 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h

@@ -35,7 +35,6 @@
 #define SPUFS_PS_MAP_SIZE	0x20000
 #define SPUFS_MFC_MAP_SIZE	0x1000
 #define SPUFS_CNTL_MAP_SIZE	0x1000
-#define SPUFS_CNTL_MAP_SIZE	0x1000
 #define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
 #define SPUFS_MSS_MAP_SIZE	0x1000
 

diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index c252ee9..45a8ed0 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig

@@ -17,6 +17,7 @@
 	select CPU_FREQ_GOV_USERSPACE
 	select CPU_FREQ_GOV_ONDEMAND
 	select CPU_FREQ_GOV_CONSERVATIVE
+	select PPC_DOORBELL
 	default y
 
 config PPC_POWERNV_RTAS

diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 4ad0d34..d55891f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile

@@ -1,9 +1,9 @@
 obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
-obj-y			+= opal-msglog.o subcore.o subcore-asm.o
+obj-y			+= opal-msglog.o
 
-obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 753f08e..8ad0c5b 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c

@@ -267,7 +267,7 @@
 {
 	s64 ret = 0;
 	u8 fstate;
-	u16 pcierr;
+	__be16 pcierr;
 	u32 pe_no;
 	int result;
 	struct pci_controller *hose = pe->phb;
@@ -316,7 +316,7 @@
 		result = 0;
 		result &= ~EEH_STATE_RESET_ACTIVE;
 
-		if (pcierr != OPAL_EEH_PHB_ERROR) {
+		if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
 			result |= EEH_STATE_MMIO_ACTIVE;
 			result |= EEH_STATE_DMA_ACTIVE;
 			result |= EEH_STATE_MMIO_ENABLED;
@@ -705,18 +705,19 @@
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
-	struct eeh_pe *phb_pe;
-	u64 frozen_pe_no;
-	u16 err_type, severity;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	long rc;
-	int ret = EEH_NEXT_ERR_NONE;
+	int state, ret = EEH_NEXT_ERR_NONE;
 
 	/*
 	 * While running here, it's safe to purge the event queue.
 	 * And we should keep the cached OPAL notifier event sychronized
 	 * between the kernel and firmware.
 	 */
-	eeh_remove_event(NULL);
+	eeh_remove_event(NULL, false);
 	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
 	list_for_each_entry(hose, &hose_list, list_node) {
@@ -742,8 +743,8 @@
 		}
 
 		/* If the PHB doesn't have error, stop processing */
-		if (err_type == OPAL_EEH_NO_ERROR ||
-		    severity == OPAL_EEH_SEV_NO_ERROR) {
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
 			pr_devel("%s: No error found on PHB#%x\n",
 				 __func__, hose->global_number);
 			continue;
@@ -755,14 +756,14 @@
 		 * specific PHB.
 		 */
 		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
-			 __func__, err_type, severity,
-			 frozen_pe_no, hose->global_number);
-		switch (err_type) {
+			 __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
+			 be64_to_cpu(frozen_pe_no), hose->global_number);
+		switch (be16_to_cpu(err_type)) {
 		case OPAL_EEH_IOC_ERROR:
-			if (severity == OPAL_EEH_SEV_IOC_DEAD) {
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
 				pr_err("EEH: dead IOC detected\n");
 				ret = EEH_NEXT_ERR_DEAD_IOC;
-			} else if (severity == OPAL_EEH_SEV_INF) {
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: IOC informative error "
 					"detected\n");
 				ioda_eeh_hub_diag(hose);
@@ -771,20 +772,26 @@
 
 			break;
 		case OPAL_EEH_PHB_ERROR:
-			if (severity == OPAL_EEH_SEV_PHB_DEAD) {
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
 				*pe = phb_pe;
-				pr_err("EEH: dead PHB#%x detected\n",
-					hose->global_number);
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+				       hose->global_number,
+				       eeh_pe_loc_get(phb_pe));
 				ret = EEH_NEXT_ERR_DEAD_PHB;
-			} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
+			} else if (be16_to_cpu(severity) ==
+						OPAL_EEH_SEV_PHB_FENCED) {
 				*pe = phb_pe;
-				pr_err("EEH: fenced PHB#%x detected\n",
-					hose->global_number);
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+				       hose->global_number,
+				       eeh_pe_loc_get(phb_pe));
 				ret = EEH_NEXT_ERR_FENCED_PHB;
-			} else if (severity == OPAL_EEH_SEV_INF) {
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
 				pr_info("EEH: PHB#%x informative error "
-					"detected\n",
-					hose->global_number);
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
 				ioda_eeh_phb_diag(hose);
 				ret = EEH_NEXT_ERR_NONE;
 			}
@@ -792,34 +799,33 @@
 			break;
 		case OPAL_EEH_PE_ERROR:
 			/*
-			 * If we can't find the corresponding PE, the
-			 * PEEV / PEST would be messy. So we force an
-			 * fenced PHB so that it can be recovered.
-			 *
-			 * If the PE has been marked as isolated, that
-			 * should have been removed permanently or in
-			 * progress with recovery. We needn't report
-			 * it again.
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
 			 */
-			if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
-				*pe = phb_pe;
-				pr_err("EEH: Escalated fenced PHB#%x "
-				       "detected for PE#%llx\n",
-					hose->global_number,
-					frozen_pe_no);
-				ret = EEH_NEXT_ERR_FENCED_PHB;
+			if (ioda_eeh_get_pe(hose,
+					    be64_to_cpu(frozen_pe_no), pe)) {
+				/* Try best to clear it */
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, frozen_pe_no);
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
 			} else if ((*pe)->state & EEH_PE_ISOLATED) {
 				ret = EEH_NEXT_ERR_NONE;
 			} else {
 				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
 					(*pe)->addr, (*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, PHB location: %s\n",
+					eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
 				ret = EEH_NEXT_ERR_FROZEN_PE;
 			}
 
 			break;
 		default:
 			pr_warn("%s: Unexpected error type %d\n",
-				__func__, err_type);
+				__func__, be16_to_cpu(err_type));
 		}
 
 		/*
@@ -837,6 +843,31 @@
 		}
 
 		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = ioda_eeh_get_state(parent_pe);
+				if (state > 0 &&
+				    (state & active_flags) != active_flags)
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+		}
+
+		/*
 		 * If we have no errors on the specific PHB or only
 		 * informative error there, we continue poking it.
 		 * Otherwise, we need actions to be taken by upper

diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 1bb25b9..44ed78a 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c

@@ -37,7 +37,8 @@
 {
 	struct memcons *mc = bin_attr->private;
 	const char *conbuf;
-	size_t ret, first_read = 0;
+	ssize_t ret;
+	size_t first_read = 0;
 	uint32_t out_pos, avail;
 
 	if (!mc)
@@ -69,6 +70,9 @@
 		to += first_read;
 		count -= first_read;
 		pos -= avail;
+
+		if (count <= 0)
+			goto out;
 	}
 
 	/* Sanity check. The firmware should not do this to us. */

diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index d202f9b..9d1acf2 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c

@@ -260,10 +260,10 @@
 			attr[i].kobj_attr.attr.mode = S_IRUGO;
 			break;
 		case OPAL_SYSPARAM_WRITE:
-			attr[i].kobj_attr.attr.mode = S_IWUGO;
+			attr[i].kobj_attr.attr.mode = S_IWUSR;
 			break;
 		case OPAL_SYSPARAM_RW:
-			attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUGO;
+			attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
 			break;
 		default:
 			break;

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index eefbfcc..f91a4e5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c

@@ -206,72 +206,91 @@
 
 	data = (struct OpalIoPhb3ErrorData*)common;
 	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
-		hose->global_number, common->version);
+		hose->global_number, be32_to_cpu(common->version));
 	if (data->brdgCtl)
 		pr_info("brdgCtl:     %08x\n",
-			data->brdgCtl);
+			be32_to_cpu(data->brdgCtl));
 	if (data->portStatusReg || data->rootCmplxStatus ||
 	    data->busAgentStatus)
 		pr_info("UtlSts:      %08x %08x %08x\n",
-			data->portStatusReg, data->rootCmplxStatus,
-			data->busAgentStatus);
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
 	if (data->deviceStatus || data->slotStatus   ||
 	    data->linkStatus   || data->devCmdStatus ||
 	    data->devSecStatus)
 		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
-			data->deviceStatus, data->slotStatus,
-			data->linkStatus, data->devCmdStatus,
-			data->devSecStatus);
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
 	if (data->rootErrorStatus || data->uncorrErrorStatus ||
 	    data->corrErrorStatus)
 		pr_info("RootErrSts:  %08x %08x %08x\n",
-			data->rootErrorStatus, data->uncorrErrorStatus,
-			data->corrErrorStatus);
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
 	if (data->tlpHdr1 || data->tlpHdr2 ||
 	    data->tlpHdr3 || data->tlpHdr4)
 		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
-			data->tlpHdr1, data->tlpHdr2,
-			data->tlpHdr3, data->tlpHdr4);
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
 	if (data->sourceId || data->errorClass ||
 	    data->correlator)
 		pr_info("RootErrLog1: %08x %016llx %016llx\n",
-			data->sourceId, data->errorClass,
-			data->correlator);
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
 	if (data->nFir)
 		pr_info("nFir:        %016llx %016llx %016llx\n",
-			data->nFir, data->nFirMask,
-			data->nFirWOF);
+			be64_to_cpu(data->nFir),
+			be64_to_cpu(data->nFirMask),
+			be64_to_cpu(data->nFirWOF));
 	if (data->phbPlssr || data->phbCsr)
 		pr_info("PhbSts:      %016llx %016llx\n",
-			data->phbPlssr, data->phbCsr);
+			be64_to_cpu(data->phbPlssr),
+			be64_to_cpu(data->phbCsr));
 	if (data->lemFir)
 		pr_info("Lem:         %016llx %016llx %016llx\n",
-			data->lemFir, data->lemErrorMask,
-			data->lemWOF);
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
 	if (data->phbErrorStatus)
 		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
-			data->phbErrorStatus, data->phbFirstErrorStatus,
-			data->phbErrorLog0, data->phbErrorLog1);
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
 	if (data->mmioErrorStatus)
 		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
-			data->mmioErrorStatus, data->mmioFirstErrorStatus,
-			data->mmioErrorLog0, data->mmioErrorLog1);
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
 	if (data->dma0ErrorStatus)
 		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
-			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
-			data->dma0ErrorLog0, data->dma0ErrorLog1);
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
 	if (data->dma1ErrorStatus)
 		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
-			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
-			data->dma1ErrorLog0, data->dma1ErrorLog1);
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
 
 	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
-		if ((data->pestA[i] >> 63) == 0 &&
-		    (data->pestB[i] >> 63) == 0)
+		if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
+		    (be64_to_cpu(data->pestB[i]) >> 63) == 0)
 			continue;
 
 		pr_info("PE[%3d] A/B: %016llx %016llx\n",
-			i, data->pestA[i], data->pestB[i]);
+				i, be64_to_cpu(data->pestA[i]),
+				be64_to_cpu(data->pestB[i]));
 	}
 }
 
@@ -284,7 +303,7 @@
 		return;
 
 	common = (struct OpalIoPhbErrorCommon *)log_buff;
-	switch (common->ioType) {
+	switch (be32_to_cpu(common->ioType)) {
 	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
 		pnv_pci_dump_p7ioc_diag_data(hose, common);
 		break;
@@ -293,7 +312,7 @@
 		break;
 	default:
 		pr_warn("%s: Unrecognized ioType %d\n",
-			__func__, common->ioType);
+			__func__, be32_to_cpu(common->ioType));
 	}
 }
 

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 8c16a5f..d9b88fa 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c

@@ -35,11 +35,14 @@
 #include <asm/rtas.h>
 #include <asm/opal.h>
 #include <asm/kexec.h>
+#include <asm/smp.h>
 
 #include "powernv.h"
 
 static void __init pnv_setup_arch(void)
 {
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
 	/* Initialize SMP */
 	pnv_smp_init();
 

diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 0062a43..5fcfcf4 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c

@@ -32,6 +32,7 @@
 #include <asm/opal.h>
 #include <asm/runlatch.h>
 #include <asm/code-patching.h>
+#include <asm/dbell.h>
 
 #include "powernv.h"
 
@@ -46,6 +47,11 @@
 {
 	if (cpu != boot_cpuid)
 		xics_setup_cpu();
+
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		doorbell_setup_this_cpu();
+#endif
 }
 
 int pnv_smp_kick_cpu(int nr)

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 2cb8b77..756b482 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig

@@ -21,6 +21,7 @@
 	select HAVE_CONTEXT_TRACKING
 	select HOTPLUG_CPU if SMP
 	select ARCH_RANDOM
+	select PPC_DOORBELL
 	default y
 
 config PPC_SPLPAR

diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
deleted file mode 100644
index 422a175..0000000
--- a/arch/powerpc/platforms/wsp/Kconfig
+++ /dev/null

@@ -1,30 +0,0 @@
-config PPC_WSP
-	bool
-	select PPC_A2
-	select GENERIC_TBSYNC
-	select PPC_ICSWX
-	select PPC_SCOM
-	select PPC_XICS
-	select PPC_ICP_NATIVE
-	select PCI
-	select PPC_IO_WORKAROUNDS if PCI
-	select PPC_INDIRECT_PIO if PCI
-	default n
-
-menu "WSP platform selection"
-	depends on PPC_BOOK3E_64
-
-config PPC_PSR2
-	bool "PowerEN System Reference Platform 2"
-	select EPAPR_BOOT
-	select PPC_WSP
-	default y
-
-config PPC_CHROMA
-	bool "PowerEN PCIe Chroma Card"
-	select EPAPR_BOOT
-	select PPC_WSP
-	select OF_DYNAMIC
-	default y
-
-endmenu

diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
deleted file mode 100644
index 162fc60..0000000
--- a/arch/powerpc/platforms/wsp/Makefile
+++ /dev/null

@@ -1,10 +0,0 @@
-ccflags-y			+= $(NO_MINIMAL_TOC)
-
-obj-y				+= setup.o ics.o wsp.o
-obj-$(CONFIG_PPC_PSR2)		+= psr2.o
-obj-$(CONFIG_PPC_CHROMA)	+= chroma.o h8.o
-obj-$(CONFIG_PPC_WSP)		+= opb_pic.o
-obj-$(CONFIG_PPC_WSP)		+= scom_wsp.o
-obj-$(CONFIG_SMP)		+= smp.o scom_smp.o
-obj-$(CONFIG_PCI)		+= wsp_pci.o
-obj-$(CONFIG_PCI_MSI)		+= msi.o

diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c
deleted file mode 100644
index aaa46b3..0000000
--- a/arch/powerpc/platforms/wsp/chroma.c
+++ /dev/null

@@ -1,56 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-#include <linux/time.h>
-#include <linux/of_fdt.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-void __init chroma_setup_arch(void)
-{
-	wsp_setup_arch();
-	wsp_setup_h8();
-
-}
-
-static int __init chroma_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "ibm,wsp-chroma"))
-		return 0;
-
-	return 1;
-}
-
-define_machine(chroma_md) {
-	.name			= "Chroma PCIe",
-	.probe			= chroma_probe,
-	.setup_arch		= chroma_setup_arch,
-	.restart		= wsp_h8_restart,
-	.power_off		= wsp_h8_power_off,
-	.halt			= wsp_halt,
-	.calibrate_decr		= generic_calibrate_decr,
-	.init_IRQ		= wsp_setup_irq,
-	.progress		= udbg_progress,
-	.power_save		= book3e_idle,
-};
-
-machine_arch_initcall(chroma_md, wsp_probe_devices);

diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c
deleted file mode 100644
index a3c87f3..0000000
--- a/arch/powerpc/platforms/wsp/h8.c
+++ /dev/null

@@ -1,135 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/io.h>
-#include <linux/of_address.h>
-
-#include "wsp.h"
-
-/*
- * The UART connection to the H8 is over ttyS1 which is just a 16550.
- * We assume that FW has it setup right and no one messes with it.
- */
-
-
-static u8 __iomem *h8;
-
-#define RBR 0		/* Receiver Buffer Register */
-#define THR 0		/* Transmitter Holding Register */
-#define LSR 5		/* Line Status Register */
-#define LSR_DR 0x01	/* LSR value for Data-Ready */
-#define LSR_THRE 0x20	/* LSR value for Transmitter-Holding-Register-Empty */
-static void wsp_h8_putc(int c)
-{
-	u8 lsr;
-
-	do {
-		lsr = readb(h8 + LSR);
-	} while ((lsr & LSR_THRE) != LSR_THRE);
-	writeb(c, h8 + THR);
-}
-
-static int wsp_h8_getc(void)
-{
-	u8 lsr;
-
-	do {
-		lsr = readb(h8 + LSR);
-	} while ((lsr & LSR_DR) != LSR_DR);
-
-	return readb(h8 + RBR);
-}
-
-static void wsp_h8_puts(const char *s, int sz)
-{
-	int i;
-
-	for (i = 0; i < sz; i++) {
-		wsp_h8_putc(s[i]);
-
-		/* no flow control so wait for echo */
-		wsp_h8_getc();
-	}
-	wsp_h8_putc('\r');
-	wsp_h8_putc('\n');
-}
-
-static void wsp_h8_terminal_cmd(const char *cmd, int sz)
-{
-	hard_irq_disable();
-	wsp_h8_puts(cmd, sz);
-	/* should never return, but just in case */
-	for (;;)
-		continue;
-}
-
-
-void wsp_h8_restart(char *cmd)
-{
-	static const char restart[] = "warm-reset";
-
-	(void)cmd;
-	wsp_h8_terminal_cmd(restart, sizeof(restart) - 1);
-}
-
-void wsp_h8_power_off(void)
-{
-	static const char off[] = "power-off";
-
-	wsp_h8_terminal_cmd(off, sizeof(off) - 1);
-}
-
-static void __iomem *wsp_h8_getaddr(void)
-{
-	struct device_node *aliases;
-	struct device_node *uart;
-	struct property *path;
-	void __iomem *va = NULL;
-
-	/*
-	 * there is nothing in the devtree to tell us which is mapped
-	 * to the H8, but se know it is the second serial port.
-	 */
-
-	aliases = of_find_node_by_path("/aliases");
-	if (aliases == NULL)
-		return NULL;
-
-	path = of_find_property(aliases, "serial1", NULL);
-	if (path == NULL)
-		goto out;
-
-	uart = of_find_node_by_path(path->value);
-	if (uart == NULL)
-		goto out;
-
-	va = of_iomap(uart, 0);
-
-	/* remove it so no one messes with it */
-	of_detach_node(uart);
-	of_node_put(uart);
-
-out:
-	of_node_put(aliases);
-
-	return va;
-}
-
-void __init wsp_setup_h8(void)
-{
-	h8 = wsp_h8_getaddr();
-
-	/* Devtree change? lets hard map it anyway */
-	if (h8 == NULL) {
-		pr_warn("UART to H8 could not be found");
-		h8 = ioremap(0xffc0008000ULL, 0x100);
-	}
-}

diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
deleted file mode 100644
index 9cd92e6..0000000
--- a/arch/powerpc/platforms/wsp/ics.c
+++ /dev/null

@@ -1,762 +0,0 @@
-/*
- * Copyright 2008-2011 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/xics.h>
-
-#include "wsp.h"
-#include "ics.h"
-
-
-/* WSP ICS */
-
-struct wsp_ics {
-	struct ics ics;
-	struct device_node *dn;
-	void __iomem *regs;
-	spinlock_t lock;
-	unsigned long *bitmap;
-	u32 chip_id;
-	u32 lsi_base;
-	u32 lsi_count;
-	u64 hwirq_start;
-	u64 count;
-#ifdef CONFIG_SMP
-	int *hwirq_cpu_map;
-#endif
-};
-
-#define to_wsp_ics(ics)	container_of(ics, struct wsp_ics, ics)
-
-#define INT_SRC_LAYER_BUID_REG(base)	((base) + 0x00)
-#define IODA_TBL_ADDR_REG(base)		((base) + 0x18)
-#define IODA_TBL_DATA_REG(base)		((base) + 0x20)
-#define XIVE_UPDATE_REG(base)		((base) + 0x28)
-#define ICS_INT_CAPS_REG(base)		((base) + 0x30)
-
-#define TBL_AUTO_INCREMENT	((1UL << 63) | (1UL << 15))
-#define TBL_SELECT_XIST		(1UL << 48)
-#define TBL_SELECT_XIVT		(1UL << 49)
-
-#define IODA_IRQ(irq)		((irq) & (0x7FFULL))	/* HRM 5.1.3.4 */
-
-#define XIST_REQUIRED		0x8
-#define XIST_REJECTED		0x4
-#define XIST_PRESENTED		0x2
-#define XIST_PENDING		0x1
-
-#define XIVE_SERVER_SHIFT	42
-#define XIVE_SERVER_MASK	0xFFFFULL
-#define XIVE_PRIORITY_MASK	0xFFULL
-#define XIVE_PRIORITY_SHIFT	32
-#define XIVE_WRITE_ENABLE	(1ULL << 63)
-
-/*
- * The docs refer to a 6 bit field called ChipID, which consists of a
- * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
- * so we ignore it, and every where we use "chip id" in this code we
- * mean the NodeID.
- */
-#define WSP_ICS_CHIP_SHIFT		17
-
-
-static struct wsp_ics *ics_list;
-static int num_ics;
-
-/* ICS Source controller accessors */
-
-static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
-{
-	unsigned long flags;
-	u64 xive;
-
-	spin_lock_irqsave(&ics->lock, flags);
-	out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
-	xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
-	spin_unlock_irqrestore(&ics->lock, flags);
-
-	return xive;
-}
-
-static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
-{
-	xive &= ~XIVE_ADDR_MASK;
-	xive |= (irq & XIVE_ADDR_MASK);
-	xive |= XIVE_WRITE_ENABLE;
-
-	out_be64(XIVE_UPDATE_REG(ics->regs), xive);
-}
-
-static u64 xive_set_server(u64 xive, unsigned int server)
-{
-	u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
-
-	xive &= mask;
-	xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
-
-	return xive;
-}
-
-static u64 xive_set_priority(u64 xive, unsigned int priority)
-{
-	u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
-
-	xive &= mask;
-	xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
-
-	return xive;
-}
-
-
-#ifdef CONFIG_SMP
-/* Find logical CPUs within mask on a given chip and store result in ret */
-void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
-{
-	int cpu, chip;
-	struct device_node *cpu_dn, *dn;
-	const u32 *prop;
-
-	cpumask_clear(ret);
-	for_each_cpu(cpu, mask) {
-		cpu_dn = of_get_cpu_node(cpu, NULL);
-		if (!cpu_dn)
-			continue;
-
-		prop = of_get_property(cpu_dn, "at-node", NULL);
-		if (!prop) {
-			of_node_put(cpu_dn);
-			continue;
-		}
-
-		dn = of_find_node_by_phandle(*prop);
-		of_node_put(cpu_dn);
-
-		chip = wsp_get_chip_id(dn);
-		if (chip == chip_id)
-			cpumask_set_cpu(cpu, ret);
-
-		of_node_put(dn);
-	}
-}
-
-/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
-static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
-			   const cpumask_t *affinity)
-{
-	cpumask_var_t avail, newmask;
-	int ret = -ENOMEM, cpu, cpu_rover = 0, target;
-	int index = hwirq - ics->hwirq_start;
-	unsigned int nodeid;
-
-	BUG_ON(index < 0 || index >= ics->count);
-
-	if (!ics->hwirq_cpu_map)
-		return -ENOMEM;
-
-	if (!distribute_irqs) {
-		ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
-		return 0;
-	}
-
-	/* Allocate needed CPU masks */
-	if (!alloc_cpumask_var(&avail, GFP_KERNEL))
-		goto ret;
-	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
-		goto freeavail;
-
-	/* Find PBus attached to the source of this IRQ */
-	nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
-
-	/* Find CPUs that could handle this IRQ */
-	if (affinity)
-		cpumask_and(avail, cpu_online_mask, affinity);
-	else
-		cpumask_copy(avail, cpu_online_mask);
-
-	/* Narrow selection down to logical CPUs on the same chip */
-	cpus_on_chip(nodeid, avail, newmask);
-
-	/* Ensure we haven't narrowed it down to 0 */
-	if (unlikely(cpumask_empty(newmask))) {
-		if (unlikely(cpumask_empty(avail))) {
-			ret = -1;
-			goto out;
-		}
-		cpumask_copy(newmask, avail);
-	}
-
-	/* Choose a CPU out of those we narrowed it down to in round robin */
-	target = hwirq % cpumask_weight(newmask);
-	for_each_cpu(cpu, newmask) {
-		if (cpu_rover++ >= target) {
-			ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
-			ret = 0;
-			goto out;
-		}
-	}
-
-	/* Shouldn't happen */
-	WARN_ON(1);
-
-out:
-	free_cpumask_var(newmask);
-freeavail:
-	free_cpumask_var(avail);
-ret:
-	if (ret < 0) {
-		ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
-		pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
-			   hwirq, ics->hwirq_cpu_map[index]);
-	}
-	return ret;
-}
-
-static void alloc_irq_map(struct wsp_ics *ics)
-{
-	int i;
-
-	ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
-	if (!ics->hwirq_cpu_map) {
-		pr_warning("Allocate hwirq_cpu_map failed, "
-			   "IRQ balancing disabled\n");
-		return;
-	}
-
-	for (i=0; i < ics->count; i++)
-		ics->hwirq_cpu_map[i] = xics_default_server;
-}
-
-static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
-{
-	int index = hwirq - ics->hwirq_start;
-
-	BUG_ON(index < 0 || index >= ics->count);
-
-	if (!ics->hwirq_cpu_map)
-		return xics_default_server;
-
-	return ics->hwirq_cpu_map[index];
-}
-#else /* !CONFIG_SMP */
-static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
-			   const cpumask_t *affinity)
-{
-	return 0;
-}
-
-static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
-{
-	return xics_default_server;
-}
-
-static void alloc_irq_map(struct wsp_ics *ics) { }
-#endif
-
-static void wsp_chip_unmask_irq(struct irq_data *d)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics;
-	int server;
-	u64 xive;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return;
-
-	ics = d->chip_data;
-	if (WARN_ON(!ics))
-		return;
-
-	server = get_irq_server(ics, hw_irq);
-
-	xive = wsp_ics_get_xive(ics, hw_irq);
-	xive = xive_set_server(xive, server);
-	xive = xive_set_priority(xive, DEFAULT_PRIORITY);
-	wsp_ics_set_xive(ics, hw_irq, xive);
-}
-
-static unsigned int wsp_chip_startup(struct irq_data *d)
-{
-	/* unmask it */
-	wsp_chip_unmask_irq(d);
-	return 0;
-}
-
-static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
-{
-	u64 xive;
-
-	if (hw_irq == XICS_IPI)
-		return;
-
-	if (WARN_ON(!ics))
-		return;
-	xive = wsp_ics_get_xive(ics, hw_irq);
-	xive = xive_set_server(xive, xics_default_server);
-	xive = xive_set_priority(xive, LOWEST_PRIORITY);
-	wsp_ics_set_xive(ics, hw_irq, xive);
-}
-
-static void wsp_chip_mask_irq(struct irq_data *d)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics = d->chip_data;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return;
-
-	wsp_mask_real_irq(hw_irq, ics);
-}
-
-static int wsp_chip_set_affinity(struct irq_data *d,
-				 const struct cpumask *cpumask, bool force)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics;
-	int ret;
-	u64 xive;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return -1;
-
-	ics = d->chip_data;
-	if (WARN_ON(!ics))
-		return -1;
-	xive = wsp_ics_get_xive(ics, hw_irq);
-
-	/*
-	 * For the moment only implement delivery to all cpus or one cpu.
-	 * Get current irq_server for the given irq
-	 */
-	ret = cache_hwirq_map(ics, hw_irq, cpumask);
-	if (ret == -1) {
-		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
-		pr_warning("%s: No online cpus in the mask %s for irq %d\n",
-			   __func__, cpulist, d->irq);
-		return -1;
-	} else if (ret == -ENOMEM) {
-		pr_warning("%s: Out of memory\n", __func__);
-		return -1;
-	}
-
-	xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
-	wsp_ics_set_xive(ics, hw_irq, xive);
-
-	return IRQ_SET_MASK_OK;
-}
-
-static struct irq_chip wsp_irq_chip = {
-	.name = "WSP ICS",
-	.irq_startup		= wsp_chip_startup,
-	.irq_mask		= wsp_chip_mask_irq,
-	.irq_unmask		= wsp_chip_unmask_irq,
-	.irq_set_affinity	= wsp_chip_set_affinity
-};
-
-static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
-{
-	/* All ICSs in the system implement a global irq number space,
-	 * so match against them all. */
-	return of_device_is_compatible(dn, "ibm,ppc-xics");
-}
-
-static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
-{
-	if (hwirq >= wsp_ics->hwirq_start &&
-	    hwirq <  wsp_ics->hwirq_start + wsp_ics->count)
-		return 1;
-
-	return 0;
-}
-
-static int wsp_ics_map(struct ics *ics, unsigned int virq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-	unsigned int hw_irq = virq_to_hw(virq);
-	unsigned long flags;
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return -ENOENT;
-
-	irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
-
-	irq_set_chip_data(virq, wsp_ics);
-
-	spin_lock_irqsave(&wsp_ics->lock, flags);
-	bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
-	spin_unlock_irqrestore(&wsp_ics->lock, flags);
-
-	return 0;
-}
-
-static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return;
-
-	pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
-	wsp_mask_real_irq(hw_irq, wsp_ics);
-}
-
-static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return -ENOENT;
-
-	return get_irq_server(wsp_ics, hw_irq);
-}
-
-/* HW Number allocation API */
-
-static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
-{
-	struct device_node *iparent;
-	int i;
-
-	iparent = of_irq_find_parent(dn);
-	if (!iparent) {
-		pr_err("wsp_ics: Failed to find interrupt parent!\n");
-		return NULL;
-	}
-
-	for(i = 0; i < num_ics; i++) {
-		if(ics_list[i].dn == iparent)
-			break;
-	}
-
-	if (i >= num_ics) {
-		pr_err("wsp_ics: Unable to find parent bitmap!\n");
-		return NULL;
-	}
-
-	return &ics_list[i];
-}
-
-int wsp_ics_alloc_irq(struct device_node *dn, int num)
-{
-	struct wsp_ics *ics;
-	int order, offset;
-
-	ics = wsp_ics_find_dn_ics(dn);
-	if (!ics)
-		return -ENODEV;
-
-	/* Fast, but overly strict if num isn't a power of two */
-	order = get_count_order(num);
-
-	spin_lock_irq(&ics->lock);
-	offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
-	spin_unlock_irq(&ics->lock);
-
-	if (offset < 0)
-		return offset;
-
-	return offset + ics->hwirq_start;
-}
-
-void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
-{
-	struct wsp_ics *ics;
-
-	ics = wsp_ics_find_dn_ics(dn);
-	if (WARN_ON(!ics))
-		return;
-
-	spin_lock_irq(&ics->lock);
-	bitmap_release_region(ics->bitmap, irq, 0);
-	spin_unlock_irq(&ics->lock);
-}
-
-/* Initialisation */
-
-static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
-				      struct device_node *dn)
-{
-	int len, i, j, size;
-	u32 start, count;
-	const u32 *p;
-
-	size = BITS_TO_LONGS(ics->count) * sizeof(long);
-	ics->bitmap = kzalloc(size, GFP_KERNEL);
-	if (!ics->bitmap) {
-		pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
-		return -ENOMEM;
-	}
-
-	spin_lock_init(&ics->lock);
-
-	p = of_get_property(dn, "available-ranges", &len);
-	if (!p || !len) {
-		/* FIXME this should be a WARN() once mambo is updated */
-		pr_err("wsp_ics: No available-ranges defined for %s\n",
-			dn->full_name);
-		return 0;
-	}
-
-	if (len % (2 * sizeof(u32)) != 0) {
-		/* FIXME this should be a WARN() once mambo is updated */
-		pr_err("wsp_ics: Invalid available-ranges for %s\n",
-			dn->full_name);
-		return 0;
-	}
-
-	bitmap_fill(ics->bitmap, ics->count);
-
-	for (i = 0; i < len / sizeof(u32); i += 2) {
-		start = of_read_number(p + i, 1);
-		count = of_read_number(p + i + 1, 1);
-
-		pr_devel("%s: start: %d count: %d\n", __func__, start, count);
-
-		if ((start + count) > (ics->hwirq_start + ics->count) ||
-		     start < ics->hwirq_start) {
-			pr_err("wsp_ics: Invalid range! -> %d to %d\n",
-					start, start + count);
-			break;
-		}
-
-		for (j = 0; j < count; j++)
-			bitmap_release_region(ics->bitmap,
-				(start + j) - ics->hwirq_start, 0);
-	}
-
-	/* Ensure LSIs are not available for allocation */
-	bitmap_allocate_region(ics->bitmap, ics->lsi_base,
-			       get_count_order(ics->lsi_count));
-
-	return 0;
-}
-
-static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
-{
-	u32 lsi_buid, msi_buid, msi_base, msi_count;
-	void __iomem *regs;
-	const u32 *p;
-	int rc, len, i;
-	u64 caps, buid;
-
-	p = of_get_property(dn, "interrupt-ranges", &len);
-	if (!p || len < (2 * sizeof(u32))) {
-		pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
-			dn->full_name);
-		return -ENOENT;
-	}
-
-	if (len > (2 * sizeof(u32))) {
-		pr_err("wsp_ics: Multiple ics ranges not supported.\n");
-		return -EINVAL;
-	}
-
-	regs = of_iomap(dn, 0);
-	if (!regs) {
-		pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
-		return -ENXIO;
-	}
-
-	ics->hwirq_start = of_read_number(p, 1);
-	ics->count = of_read_number(p + 1, 1);
-	ics->regs = regs;
-
-	ics->chip_id = wsp_get_chip_id(dn);
-	if (WARN_ON(ics->chip_id < 0))
-		ics->chip_id = 0;
-
-	/* Get some informations about the critter */
-	caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
-	buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
-	ics->lsi_count = caps >> 56;
-	msi_count = (caps >> 44) & 0x7ff;
-
-	/* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
-	 * rest is mixed in the interrupt number. We store the whole
-	 * thing though
-	 */
-	lsi_buid = (buid >> 48) & 0x1ff;
-	ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
-	msi_buid = (buid >> 37) & 0x7;
-	msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
-
-	pr_info("wsp_ics: Found %s\n", dn->full_name);
-	pr_info("wsp_ics:    irq range : 0x%06llx..0x%06llx\n",
-		ics->hwirq_start, ics->hwirq_start + ics->count - 1);
-	pr_info("wsp_ics:    %4d LSIs : 0x%06x..0x%06x\n",
-		ics->lsi_count, ics->lsi_base,
-		ics->lsi_base + ics->lsi_count - 1);
-	pr_info("wsp_ics:    %4d MSIs : 0x%06x..0x%06x\n",
-		msi_count, msi_base,
-		msi_base + msi_count - 1);
-
-	/* Let's check the HW config is sane */
-	if (ics->lsi_base < ics->hwirq_start ||
-	    (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
-		pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
-	if (msi_base < ics->hwirq_start ||
-	    (msi_base + msi_count) > (ics->hwirq_start + ics->count))
-		pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
-
-	/* We don't check for overlap between LSI and MSI, which will happen
-	 * if we use the same BUID, I'm not sure yet how legit that is.
-	 */
-
-	rc = wsp_ics_bitmap_setup(ics, dn);
-	if (rc) {
-		iounmap(regs);
-		return rc;
-	}
-
-	ics->dn = of_node_get(dn);
-	alloc_irq_map(ics);
-
-	for(i = 0; i < ics->count; i++)
-		wsp_mask_real_irq(ics->hwirq_start + i, ics);
-
-	ics->ics.map = wsp_ics_map;
-	ics->ics.mask_unknown = wsp_ics_mask_unknown;
-	ics->ics.get_server = wsp_ics_get_server;
-	ics->ics.host_match = wsp_ics_host_match;
-
-	xics_register_ics(&ics->ics);
-
-	return 0;
-}
-
-static void __init wsp_ics_set_default_server(void)
-{
-	struct device_node *np;
-	u32 hwid;
-
-	/* Find the server number for the boot cpu. */
-	np = of_get_cpu_node(boot_cpuid, NULL);
-	BUG_ON(!np);
-
-	hwid = get_hard_smp_processor_id(boot_cpuid);
-
-	pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
-	xics_default_server = hwid;
-
-	of_node_put(np);
-}
-
-static int __init wsp_ics_init(void)
-{
-	struct device_node *dn;
-	struct wsp_ics *ics;
-	int rc, found;
-
-	wsp_ics_set_default_server();
-
-	found = 0;
-	for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
-		found++;
-
-	if (found == 0) {
-		pr_err("wsp_ics: No ICS's found!\n");
-		return -ENODEV;
-	}
-
-	ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
-	if (!ics_list) {
-		pr_err("wsp_ics: No memory for structs.\n");
-		return -ENOMEM;
-	}
-
-	num_ics = 0;
-	ics = ics_list;
-	for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
-		rc = wsp_ics_setup(ics, dn);
-		if (rc == 0) {
-			ics++;
-			num_ics++;
-		}
-	}
-
-	if (found != num_ics) {
-		pr_err("wsp_ics: Failed setting up %d ICS's\n",
-			found - num_ics);
-		return -1;
-	}
-
-	return 0;
-}
-
-void __init wsp_init_irq(void)
-{
-	wsp_ics_init();
-	xics_init();
-
-	/* We need to patch our irq chip's EOI to point to the right ICP */
-	wsp_irq_chip.irq_eoi = icp_ops->eoi;
-}
-
-#ifdef CONFIG_PCI_MSI
-static void wsp_ics_msi_unmask_irq(struct irq_data *d)
-{
-	wsp_chip_unmask_irq(d);
-	unmask_msi_irq(d);
-}
-
-static unsigned int wsp_ics_msi_startup(struct irq_data *d)
-{
-	wsp_ics_msi_unmask_irq(d);
-	return 0;
-}
-
-static void wsp_ics_msi_mask_irq(struct irq_data *d)
-{
-	mask_msi_irq(d);
-	wsp_chip_mask_irq(d);
-}
-
-/*
- * we do it this way because we reassinge default EOI handling in
- * irq_init() above
- */
-static void wsp_ics_eoi(struct irq_data *data)
-{
-	wsp_irq_chip.irq_eoi(data);
-}
-
-static struct irq_chip wsp_ics_msi = {
-	.name = "WSP ICS MSI",
-	.irq_startup = wsp_ics_msi_startup,
-	.irq_mask = wsp_ics_msi_mask_irq,
-	.irq_unmask = wsp_ics_msi_unmask_irq,
-	.irq_eoi = wsp_ics_eoi,
-	.irq_set_affinity = wsp_chip_set_affinity
-};
-
-void wsp_ics_set_msi_chip(unsigned int irq)
-{
-	irq_set_chip(irq, &wsp_ics_msi);
-}
-
-void wsp_ics_set_std_chip(unsigned int irq)
-{
-	irq_set_chip(irq, &wsp_irq_chip);
-}
-#endif /* CONFIG_PCI_MSI */

diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
deleted file mode 100644
index 07b644e..0000000
--- a/arch/powerpc/platforms/wsp/ics.h
+++ /dev/null

@@ -1,25 +0,0 @@
-/*
- * Copyright 2009 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef __ICS_H
-#define __ICS_H
-
-#define XIVE_ADDR_MASK		0x7FFULL
-
-extern void wsp_init_irq(void);
-
-extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
-extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
-
-#ifdef CONFIG_PCI_MSI
-extern void wsp_ics_set_msi_chip(unsigned int irq);
-extern void wsp_ics_set_std_chip(unsigned int irq);
-#endif /* CONFIG_PCI_MSI */
-
-#endif /* __ICS_H */

diff --git a/arch/powerpc/platforms/wsp/msi.c b/arch/powerpc/platforms/wsp/msi.c
deleted file mode 100644
index 380882f..0000000
--- a/arch/powerpc/platforms/wsp/msi.c
+++ /dev/null

@@ -1,102 +0,0 @@
-/*
- * Copyright 2011 Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-
-#include "msi.h"
-#include "ics.h"
-#include "wsp_pci.h"
-
-/* Magic addresses for 32 & 64-bit MSIs with hardcoded MVE 0 */
-#define MSI_ADDR_32		0xFFFF0000ul
-#define MSI_ADDR_64		0x1000000000000000ul
-
-int wsp_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct pci_controller *phb;
-	struct msi_desc *entry;
-	struct msi_msg msg;
-	unsigned int virq;
-	int hwirq;
-
-	phb = pci_bus_to_host(dev->bus);
-	if (!phb)
-		return -ENOENT;
-
-	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
-	if (entry->msi_attrib.is_64) {
-		msg.address_lo = 0;
-		msg.address_hi = MSI_ADDR_64 >> 32;
-	} else {
-		msg.address_lo = MSI_ADDR_32;
-		msg.address_hi = 0;
-	}
-
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		hwirq = wsp_ics_alloc_irq(phb->dn, 1);
-		if (hwirq < 0) {
-			dev_warn(&dev->dev, "wsp_msi: hwirq alloc failed!\n");
-			return hwirq;
-		}
-
-		virq = irq_create_mapping(NULL, hwirq);
-		if (virq == NO_IRQ) {
-			dev_warn(&dev->dev, "wsp_msi: virq alloc failed!\n");
-			return -1;
-		}
-
-		dev_dbg(&dev->dev, "wsp_msi: allocated irq %#x/%#x\n",
-			hwirq, virq);
-
-		wsp_ics_set_msi_chip(virq);
-		irq_set_msi_desc(virq, entry);
-		msg.data = hwirq & XIVE_ADDR_MASK;
-		write_msi_msg(virq, &msg);
-	}
-
-	return 0;
-}
-
-void wsp_teardown_msi_irqs(struct pci_dev *dev)
-{
-	struct pci_controller *phb;
-	struct msi_desc *entry;
-	int hwirq;
-
-	phb = pci_bus_to_host(dev->bus);
-
-	dev_dbg(&dev->dev, "wsp_msi: tearing down msi irqs\n");
-
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq == NO_IRQ)
-			continue;
-
-		irq_set_msi_desc(entry->irq, NULL);
-		wsp_ics_set_std_chip(entry->irq);
-
-		hwirq = virq_to_hw(entry->irq);
-		/* In this order to avoid racing with irq_create_mapping() */
-		irq_dispose_mapping(entry->irq);
-		wsp_ics_free_irq(phb->dn, hwirq);
-	}
-}
-
-void wsp_setup_phb_msi(struct pci_controller *phb)
-{
-	/* Create a single MVE at offset 0 that matches everything */
-	out_be64(phb->cfg_data + PCIE_REG_IODA_ADDR, PCIE_REG_IODA_AD_TBL_MVT);
-	out_be64(phb->cfg_data + PCIE_REG_IODA_DATA0, 1ull << 63);
-
-	ppc_md.setup_msi_irqs = wsp_setup_msi_irqs;
-	ppc_md.teardown_msi_irqs = wsp_teardown_msi_irqs;
-}

diff --git a/arch/powerpc/platforms/wsp/msi.h b/arch/powerpc/platforms/wsp/msi.h
deleted file mode 100644
index 0ab27b7..0000000
--- a/arch/powerpc/platforms/wsp/msi.h
+++ /dev/null

@@ -1,19 +0,0 @@
-/*
- * Copyright 2011 Michael Ellerman, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef __WSP_MSI_H
-#define __WSP_MSI_H
-
-#ifdef CONFIG_PCI_MSI
-extern void wsp_setup_phb_msi(struct pci_controller *phb);
-#else
-static inline void wsp_setup_phb_msi(struct pci_controller *phb) { }
-#endif
-
-#endif /* __WSP_MSI_H */

diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
deleted file mode 100644
index 3f67298..0000000
--- a/arch/powerpc/platforms/wsp/opb_pic.c
+++ /dev/null

@@ -1,321 +0,0 @@
-/*
- * IBM Onboard Peripheral Bus Interrupt Controller
- *
- * Copyright 2010 Jack Miller, IBM Corporation.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <asm/reg_a2.h>
-#include <asm/irq.h>
-
-#define OPB_NR_IRQS 32
-
-#define OPB_MLSASIER	0x04    /* MLS Accumulated Status IER */
-#define OPB_MLSIR	0x50	/* MLS Interrupt Register */
-#define OPB_MLSIER	0x54	/* MLS Interrupt Enable Register */
-#define OPB_MLSIPR	0x58	/* MLS Interrupt Polarity Register */
-#define OPB_MLSIIR	0x5c	/* MLS Interrupt Inputs Register */
-
-static int opb_index = 0;
-
-struct opb_pic {
-	struct irq_domain *host;
-	void *regs;
-	int index;
-	spinlock_t lock;
-};
-
-static u32 opb_in(struct opb_pic *opb, int offset)
-{
-	return in_be32(opb->regs + offset);
-}
-
-static void opb_out(struct opb_pic *opb, int offset, u32 val)
-{
-	out_be32(opb->regs + offset, val);
-}
-
-static void opb_unmask_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 ier, bitset;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier | bitset);
-	ier = opb_in(opb, OPB_MLSIER);
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_mask_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 ier, mask;
-
-	opb = d->chip_data;
-	mask = ~(1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier & mask);
-	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_ack_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 bitset;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	opb_out(opb, OPB_MLSIR, bitset);
-	opb_in(opb, OPB_MLSIR); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_mask_ack_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 bitset;
-	u32 ier, ir;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier & ~bitset);
-	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
-
-	opb_out(opb, OPB_MLSIR, bitset);
-	ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	int invert, ipr, mask, bit;
-
-	opb = d->chip_data;
-
-	/* The only information we're interested in in the type is whether it's
-	 * a high or low trigger. For high triggered interrupts, the polarity
-	 * set for it in the MLS Interrupt Polarity Register is 0, for low
-	 * interrupts it's 1 so that the proper input in the MLS Interrupt Input
-	 * Register is interrupted as asserting the interrupt. */
-
-	switch (flow) {
-		case IRQ_TYPE_NONE:
-			opb_mask_irq(d);
-			return 0;
-
-		case IRQ_TYPE_LEVEL_HIGH:
-			invert = 0;
-			break;
-
-		case IRQ_TYPE_LEVEL_LOW:
-			invert = 1;
-			break;
-
-		default:
-			return -EINVAL;
-	}
-
-	bit = (1 << (31 - irqd_to_hwirq(d)));
-	mask = ~bit;
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ipr = opb_in(opb, OPB_MLSIPR);
-	ipr = (ipr & mask) | (invert ? bit : 0);
-	opb_out(opb, OPB_MLSIPR, ipr);
-	ipr = opb_in(opb, OPB_MLSIPR);  // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-
-	/* Record the type in the interrupt descriptor */
-	irqd_set_trigger_type(d, flow);
-
-	return 0;
-}
-
-static struct irq_chip opb_irq_chip = {
-	.name		= "OPB",
-	.irq_mask	= opb_mask_irq,
-	.irq_unmask	= opb_unmask_irq,
-	.irq_mask_ack	= opb_mask_ack_irq,
-	.irq_ack	= opb_ack_irq,
-	.irq_set_type	= opb_set_irq_type
-};
-
-static int opb_host_map(struct irq_domain *host, unsigned int virq,
-		irq_hw_number_t hwirq)
-{
-	struct opb_pic *opb;
-
-	opb = host->host_data;
-
-	/* Most of the important stuff is handled by the generic host code, like
-	 * the lookup, so just attach some info to the virtual irq */
-
-	irq_set_chip_data(virq, opb);
-	irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
-	irq_set_irq_type(virq, IRQ_TYPE_NONE);
-
-	return 0;
-}
-
-static const struct irq_domain_ops opb_host_ops = {
-	.map = opb_host_map,
-	.xlate = irq_domain_xlate_twocell,
-};
-
-irqreturn_t opb_irq_handler(int irq, void *private)
-{
-	struct opb_pic *opb;
-	u32 ir, src, subvirq;
-
-	opb = (struct opb_pic *) private;
-
-	/* Read the OPB MLS Interrupt Register for
-	 * asserted interrupts */
-	ir = opb_in(opb, OPB_MLSIR);
-	if (!ir)
-		return IRQ_NONE;
-
-	do {
-		/* Get 1 - 32 source, *NOT* bit */
-		src = 32 - ffs(ir);
-
-		/* Translate from the OPB's conception of interrupt number to
-		 * Linux's virtual IRQ */
-
-		subvirq = irq_linear_revmap(opb->host, src);
-
-		generic_handle_irq(subvirq);
-	} while ((ir = opb_in(opb, OPB_MLSIR)));
-
-	return IRQ_HANDLED;
-}
-
-struct opb_pic *opb_pic_init_one(struct device_node *dn)
-{
-	struct opb_pic *opb;
-	struct resource res;
-
-	if (of_address_to_resource(dn, 0, &res)) {
-		printk(KERN_ERR "opb: Couldn't translate resource\n");
-		return  NULL;
-	}
-
-	opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
-	if (!opb) {
-		printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
-		return NULL;
-	}
-
-	/* Get access to the OPB MMIO registers */
-	opb->regs = ioremap(res.start + 0x10000, 0x1000);
-	if (!opb->regs) {
-		printk(KERN_ERR "opb: Failed to allocate register space!\n");
-		goto free_opb;
-	}
-
-	/* Allocate an irq domain so that Linux knows that despite only
-	 * having one interrupt to issue, we're the controller for multiple
-	 * hardware IRQs, so later we can lookup their virtual IRQs. */
-
-	opb->host = irq_domain_add_linear(dn, OPB_NR_IRQS, &opb_host_ops, opb);
-	if (!opb->host) {
-		printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
-		goto free_regs;
-	}
-
-	opb->index = opb_index++;
-	spin_lock_init(&opb->lock);
-
-	/* Disable all interrupts by default */
-	opb_out(opb, OPB_MLSASIER, 0);
-	opb_out(opb, OPB_MLSIER, 0);
-
-	/* ACK any interrupts left by FW */
-	opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
-
-	return opb;
-
-free_regs:
-	iounmap(opb->regs);
-free_opb:
-	kfree(opb);
-	return NULL;
-}
-
-void __init opb_pic_init(void)
-{
-	struct device_node *dn;
-	struct opb_pic *opb;
-	int virq;
-	int rc;
-
-	/* Call init_one for each OPB device */
-	for_each_compatible_node(dn, NULL, "ibm,opb") {
-
-		/* Fill in an OPB struct */
-		opb = opb_pic_init_one(dn);
-		if (!opb) {
-			printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
-			continue;
-		}
-
-		/* Map / get opb's hardware virtual irq */
-		virq = irq_of_parse_and_map(dn, 0);
-		if (virq <= 0) {
-			printk("opb: irq_op_parse_and_map failed!\n");
-			continue;
-		}
-
-		/* Attach opb interrupt handler to new virtual IRQ */
-		rc = request_irq(virq, opb_irq_handler, IRQF_NO_THREAD,
-				 "OPB LS Cascade", opb);
-		if (rc) {
-			printk("opb: request_irq failed: %d\n", rc);
-			continue;
-		}
-
-		printk("OPB%d init with %d IRQs at %p\n", opb->index,
-				OPB_NR_IRQS, opb->regs);
-	}
-}

diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
deleted file mode 100644
index a87b414..0000000
--- a/arch/powerpc/platforms/wsp/psr2.c
+++ /dev/null

@@ -1,67 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-#include <linux/time.h>
-#include <linux/of_fdt.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-
-static void psr2_spin(void)
-{
-	hard_irq_disable();
-	for (;;)
-		continue;
-}
-
-static void psr2_restart(char *cmd)
-{
-	psr2_spin();
-}
-
-static int __init psr2_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) {
-		/* chroma systems also claim they are psr2s */
-		return 0;
-	}
-
-	if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
-		return 0;
-
-	return 1;
-}
-
-define_machine(psr2_md) {
-	.name			= "PSR2 A2",
-	.probe			= psr2_probe,
-	.setup_arch		= wsp_setup_arch,
-	.restart		= psr2_restart,
-	.power_off		= psr2_spin,
-	.halt			= psr2_spin,
-	.calibrate_decr		= generic_calibrate_decr,
-	.init_IRQ		= wsp_setup_irq,
-	.progress		= udbg_progress,
-	.power_save		= book3e_idle,
-};
-
-machine_arch_initcall(psr2_md, wsp_probe_devices);

diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
deleted file mode 100644
index 8c79ce0..0000000
--- a/arch/powerpc/platforms/wsp/scom_smp.c
+++ /dev/null

@@ -1,435 +0,0 @@
-/*
- * SCOM support for A2 platforms
- *
- * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
- *		       Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-#include <asm/cputhreads.h>
-#include <asm/reg_a2.h>
-#include <asm/scom.h>
-#include <asm/udbg.h>
-#include <asm/code-patching.h>
-
-#include "wsp.h"
-
-#define SCOM_RAMC		0x2a		/* Ram Command */
-#define SCOM_RAMC_TGT1_EXT	0x80000000
-#define SCOM_RAMC_SRC1_EXT	0x40000000
-#define SCOM_RAMC_SRC2_EXT	0x20000000
-#define SCOM_RAMC_SRC3_EXT	0x10000000
-#define SCOM_RAMC_ENABLE	0x00080000
-#define SCOM_RAMC_THREADSEL	0x00060000
-#define SCOM_RAMC_EXECUTE	0x00010000
-#define SCOM_RAMC_MSR_OVERRIDE	0x00008000
-#define SCOM_RAMC_MSR_PR	0x00004000
-#define SCOM_RAMC_MSR_GS	0x00002000
-#define SCOM_RAMC_FORCE		0x00001000
-#define SCOM_RAMC_FLUSH		0x00000800
-#define SCOM_RAMC_INTERRUPT	0x00000004
-#define SCOM_RAMC_ERROR		0x00000002
-#define SCOM_RAMC_DONE		0x00000001
-#define SCOM_RAMI		0x29		/* Ram Instruction */
-#define SCOM_RAMIC		0x28		/* Ram Instruction and Command */
-#define SCOM_RAMIC_INSN		0xffffffff00000000
-#define SCOM_RAMD		0x2d		/* Ram Data */
-#define SCOM_RAMDH		0x2e		/* Ram Data High */
-#define SCOM_RAMDL		0x2f		/* Ram Data Low */
-#define SCOM_PCCR0		0x33		/* PC Configuration Register 0 */
-#define SCOM_PCCR0_ENABLE_DEBUG	0x80000000
-#define SCOM_PCCR0_ENABLE_RAM	0x40000000
-#define SCOM_THRCTL		0x30		/* Thread Control and Status */
-#define SCOM_THRCTL_T0_STOP	0x80000000
-#define SCOM_THRCTL_T1_STOP	0x40000000
-#define SCOM_THRCTL_T2_STOP	0x20000000
-#define SCOM_THRCTL_T3_STOP	0x10000000
-#define SCOM_THRCTL_T0_STEP	0x08000000
-#define SCOM_THRCTL_T1_STEP	0x04000000
-#define SCOM_THRCTL_T2_STEP	0x02000000
-#define SCOM_THRCTL_T3_STEP	0x01000000
-#define SCOM_THRCTL_T0_RUN	0x00800000
-#define SCOM_THRCTL_T1_RUN	0x00400000
-#define SCOM_THRCTL_T2_RUN	0x00200000
-#define SCOM_THRCTL_T3_RUN	0x00100000
-#define SCOM_THRCTL_T0_PM	0x00080000
-#define SCOM_THRCTL_T1_PM	0x00040000
-#define SCOM_THRCTL_T2_PM	0x00020000
-#define SCOM_THRCTL_T3_PM	0x00010000
-#define SCOM_THRCTL_T0_UDE	0x00008000
-#define SCOM_THRCTL_T1_UDE	0x00004000
-#define SCOM_THRCTL_T2_UDE	0x00002000
-#define SCOM_THRCTL_T3_UDE	0x00001000
-#define SCOM_THRCTL_ASYNC_DIS	0x00000800
-#define SCOM_THRCTL_TB_DIS	0x00000400
-#define SCOM_THRCTL_DEC_DIS	0x00000200
-#define SCOM_THRCTL_AND		0x31		/* Thread Control and Status */
-#define SCOM_THRCTL_OR		0x32		/* Thread Control and Status */
-
-
-static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
-
-static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
-{
-	scom_map_t scom = per_cpu(scom_ptrs, cpu);
-	int tcpu;
-
-	if (scom_map_ok(scom)) {
-		*first_thread = 0;
-		return scom;
-	}
-
-	*first_thread = 1;
-
-	scom = scom_map_device(np, 0);
-
-	for (tcpu = cpu_first_thread_sibling(cpu);
-	     tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
-		per_cpu(scom_ptrs, tcpu) = scom;
-
-	/* Hack: for the boot core, this will actually get called on
-	 * the second thread up, not the first so our test above will
-	 * set first_thread incorrectly. */
-	if (cpu_first_thread_sibling(cpu) == 0)
-		*first_thread = 0;
-
-	return scom;
-}
-
-static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
-{
-	u64 cmd, mask, val;
-	int n = 0;
-
-	cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
-		| ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
-	mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
-
-	scom_write(scom, SCOM_RAMIC, cmd);
-
-	for (;;) {
-		if (scom_read(scom, SCOM_RAMC, &val) != 0) {
-			pr_err("SCOM error on instruction 0x%08x, thread %d\n",
-			       insn, thread);
-			return -1;
-		}
-		if (val & mask)
-			break;
-		pr_devel("Waiting on RAMC = 0x%llx\n", val);
-		if (++n == 3) {
-			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
-			       insn, thread);
-			return -1;
-		}
-	}
-
-	if (val & SCOM_RAMC_INTERRUPT) {
-		pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
-		       insn, thread);
-		return -SCOM_RAMC_INTERRUPT;
-	}
-
-	if (val & SCOM_RAMC_ERROR) {
-		pr_err("RAMC error on instruction 0x%08x, thread %d\n",
-		       insn, thread);
-		return -SCOM_RAMC_ERROR;
-	}
-
-	return 0;
-}
-
-static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
-			  u64 *out_gpr)
-{
-	int rc;
-
-	/* or rN, rN, rN */
-	u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
-	rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
-	if (rc)
-		return rc;
-
-	return scom_read(scom, SCOM_RAMD, out_gpr);
-}
-
-static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
-{
-	int rc, sprhi, sprlo;
-	u32 insn;
-
-	sprhi = spr >> 5;
-	sprlo = spr & 0x1f;
-	insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
-
-	if (spr == 0x0ff0)
-		insn = 0x7c2000a6; /* mfmsr r1 */
-
-	rc = a2_scom_ram(scom, thread, insn, 0xf);
-	if (rc)
-		return rc;
-	return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
-}
-
-static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
-			  int alt, u64 val)
-{
-	u32 lis = 0x3c000000 | (gpr << 21);
-	u32 li = 0x38000000 | (gpr << 21);
-	u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
-	u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
-	u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
-	u32 highest = val >> 48;
-	u32 higher = (val >> 32) & 0xffff;
-	u32 high = (val >> 16) & 0xffff;
-	u32 low = val & 0xffff;
-	int lext = alt ? 0x8 : 0x0;
-	int oext = alt ? 0xf : 0x0;
-	int rc = 0;
-
-	if (highest)
-		rc |= a2_scom_ram(scom, thread, lis | highest, lext);
-
-	if (higher) {
-		if (highest)
-			rc |= a2_scom_ram(scom, thread, oris | higher, oext);
-		else
-			rc |= a2_scom_ram(scom, thread, li | higher, lext);
-	}
-
-	if (highest || higher)
-		rc |= a2_scom_ram(scom, thread, rldicr32, oext);
-
-	if (high) {
-		if (highest || higher)
-			rc |= a2_scom_ram(scom, thread, oris | high, oext);
-		else
-			rc |= a2_scom_ram(scom, thread, lis | high, lext);
-	}
-
-	if (highest || higher || high)
-		rc |= a2_scom_ram(scom, thread, ori | low, oext);
-	else
-		rc |= a2_scom_ram(scom, thread, li | low, lext);
-
-	return rc;
-}
-
-static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
-{
-	int sprhi = spr >> 5;
-	int sprlo = spr & 0x1f;
-	/* mtspr spr, r1 */
-	u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
-
-	if (spr == 0x0ff0)
-		insn = 0x7c200124; /* mtmsr r1 */
-
-	if (a2_scom_setgpr(scom, thread, 1, 1, val))
-		return -1;
-
-	return a2_scom_ram(scom, thread, insn, 0xf);
-}
-
-static int a2_scom_initial_tlb(scom_map_t scom, int thread)
-{
-	extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
-	extern u32 a2_tlbinit_after_iprot_flush[];
-	extern u32 a2_tlbinit_after_linear_map[];
-	u32 assoc, entries, i;
-	u64 epn, tlbcfg;
-	u32 *p;
-	int rc;
-
-	/* Invalidate all entries (including iprot) */
-
-	rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
-	if (rc)
-		goto scom_fail;
-	entries = tlbcfg & TLBnCFG_N_ENTRY;
-	assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
-	epn = 0;
-
-	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
-	a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
-	/* Set MMUCR3 to write all thids bit to the TLB */
-	a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
-
-	/* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
-	a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
-	a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
-	for (i = 0; i < entries; i++) {
-
-		a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
-
-		/* tlbwe */
-		rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
-		if (rc)
-			goto scom_fail;
-
-		/* Next entry is new address? */
-		if((i + 1) % assoc == 0) {
-			epn += (1 << 30);
-			a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
-		}
-	}
-
-	/* Setup args for linear mapping */
-	rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
-	if (rc)
-		goto scom_fail;
-
-	/* Linear mapping */
-	for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
-		rc = a2_scom_ram(scom, thread, *p, 0);
-		if (rc)
-			goto scom_fail;
-	}
-
-	/*
-	 * For the boot thread, between the linear mapping and the debug
-	 * mappings there is a loop to flush iprot mappings. Ramming doesn't do
-	 * branches, but the secondary threads don't need to be nearly as smart
-	 * (i.e. we don't need to worry about invalidating the mapping we're
-	 * standing on).
-	 */
-
-	/* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
-	for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
-		rc = a2_scom_ram(scom, thread, *p, 0);
-		if (rc)
-			goto scom_fail;
-	}
-
-scom_fail:
-	if (rc)
-		pr_err("Setting up initial TLB failed, err %d\n", rc);
-
-	if (rc == -SCOM_RAMC_INTERRUPT) {
-		/* Interrupt, dump some status */
-		int rc[10];
-		u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
-		rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
-		rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
-		rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
-		rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
-		rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
-		rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
-		rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
-		rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
-		rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
-		rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
-		pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
-		pr_err("    retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
-		pr_err("    retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
-		pr_err("    retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
-		pr_err("    retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
-		pr_err("    retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
-		pr_err("    retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
-		pr_err("    retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
-		pr_err("    retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
-		pr_err("    retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
-	}
-
-	return rc;
-}
-
-int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np)
-{
-	u64 init_iar, init_msr, init_ccr2;
-	unsigned long start_here;
-	int rc, core_setup;
-	scom_map_t scom;
-	u64 pccr0;
-
-	scom = get_scom(lcpu, np, &core_setup);
-	if (!scom) {
-		printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
-		return -1;
-	}
-
-	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
-
-	if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) {
-		printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu);
-		return -1;
-	}
-	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
-				     SCOM_PCCR0_ENABLE_RAM);
-
-	/* Stop the thead with THRCTL. If we are setting up the TLB we stop all
-	 * threads. We also disable asynchronous interrupts while RAMing.
-	 */
-	if (core_setup)
-		scom_write(scom, SCOM_THRCTL_OR,
-			      SCOM_THRCTL_T0_STOP |
-			      SCOM_THRCTL_T1_STOP |
-			      SCOM_THRCTL_T2_STOP |
-			      SCOM_THRCTL_T3_STOP |
-			      SCOM_THRCTL_ASYNC_DIS);
-	else
-		scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
-
-	/* Flush its pipeline just in case */
-	scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
-		      SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
-
-	a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
-	a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
-	a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
-
-	/* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
-	rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
-	if (rc) {
-		pr_err("Failed to set MSR ! err %d\n", rc);
-		return rc;
-	}
-
-	/* RAM in an sync/isync for the sake of it */
-	a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
-	a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
-
-	if (core_setup) {
-		pr_devel("CPU%d is first thread in core, initializing TLB...\n",
-			 lcpu);
-		rc = a2_scom_initial_tlb(scom, thr_idx);
-		if (rc)
-			goto fail;
-	}
-
-	start_here = ppc_function_entry(core_setup ? generic_secondary_smp_init
-					: generic_secondary_thread_init);
-	pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
-
-	rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
-	rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
-			     get_hard_smp_processor_id(lcpu));
-	/*
-	 * Tell book3e_secondary_core_init not to set up the TLB, we've
-	 * already done that.
-	 */
-	rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
-
-	rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
-
-	scom_write(scom, SCOM_RAMC, 0);
-	scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
-	scom_write(scom, SCOM_PCCR0, pccr0);
-fail:
-	pr_devel("  SCOM initialization %s\n", rc ? "failed" : "succeeded");
-	if (rc) {
-		pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
-		       init_iar, init_msr, init_ccr2);
-	}
-
-	return rc;
-}

diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
deleted file mode 100644
index 6538b4d..0000000
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ /dev/null

@@ -1,82 +0,0 @@
-/*
- *  SCOM backend for WSP
- *
- *  Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/of_address.h>
-
-#include <asm/cputhreads.h>
-#include <asm/reg_a2.h>
-#include <asm/scom.h>
-#include <asm/udbg.h>
-
-#include "wsp.h"
-
-
-static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
-{
-	struct resource r;
-	u64 xscom_addr;
-
-	if (!of_get_property(dev, "scom-controller", NULL)) {
-		pr_err("%s: device %s is not a SCOM controller\n",
-			__func__, dev->full_name);
-		return SCOM_MAP_INVALID;
-	}
-
-	if (of_address_to_resource(dev, 0, &r)) {
-		pr_debug("Failed to find SCOM controller address\n");
-		return 0;
-	}
-
-	/* Transform the SCOM address into an XSCOM offset */
-	xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
-
-	return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
-}
-
-static void wsp_scom_unmap(scom_map_t map)
-{
-	iounmap((void *)map);
-}
-
-static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value)
-{
-	u64 __iomem *addr = (u64 __iomem *)map;
-
-	*value = in_be64(addr + reg);
-
-	return 0;
-}
-
-static int wsp_scom_write(scom_map_t map, u64 reg, u64 value)
-{
-	u64 __iomem *addr = (u64 __iomem *)map;
-
-	out_be64(addr + reg, value);
-
-	return 0;
-}
-
-static const struct scom_controller wsp_scom_controller = {
-	.map	= wsp_scom_map,
-	.unmap	= wsp_scom_unmap,
-	.read	= wsp_scom_read,
-	.write	= wsp_scom_write
-};
-
-void scom_init_wsp(void)
-{
-	scom_init(&wsp_scom_controller);
-}

diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
deleted file mode 100644
index 11ac2f0..0000000
--- a/arch/powerpc/platforms/wsp/setup.c
+++ /dev/null

@@ -1,36 +0,0 @@
-/*
- * Copyright 2010 Michael Ellerman, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of_platform.h>
-
-#include "wsp.h"
-
-/*
- * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
- * Won't work for nodes that are not a descendant of a wsp node.
- */
-int wsp_get_chip_id(struct device_node *dn)
-{
-	const u32 *p;
-	int rc;
-
-	/* Start looking at the specified node, not its parent */
-	dn = of_node_get(dn);
-	while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
-		dn = of_get_next_parent(dn);
-
-	if (!dn)
-		return -1;
-
-	rc = *p;
-	of_node_put(dn);
-
-	return rc;
-}

diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
deleted file mode 100644
index 332a18b..0000000
--- a/arch/powerpc/platforms/wsp/smp.c
+++ /dev/null

@@ -1,88 +0,0 @@
-/*
- *  SMP Support for A2 platforms
- *
- *  Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/cpumask.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-
-#include <asm/dbell.h>
-#include <asm/machdep.h>
-#include <asm/xics.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-static void smp_a2_setup_cpu(int cpu)
-{
-	doorbell_setup_this_cpu();
-
-	if (cpu != boot_cpuid)
-		xics_setup_cpu();
-}
-
-int smp_a2_kick_cpu(int nr)
-{
-	const char *enable_method;
-	struct device_node *np;
-	int thr_idx;
-
-	if (nr < 0 || nr >= NR_CPUS)
-		return -ENOENT;
-
-	np = of_get_cpu_node(nr, &thr_idx);
-	if (!np)
-		return -ENODEV;
-
-	enable_method = of_get_property(np, "enable-method", NULL);
-	pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
-
-	if (!enable_method) {
-                printk(KERN_ERR "CPU%d has no enable-method\n", nr);
-		return -ENOENT;
-	} else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
-		if (a2_scom_startup_cpu(nr, thr_idx, np))
-			return -1;
-	} else {
-		printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
-                       nr, enable_method);
-		return -EINVAL;
-	}
-
-	/*
-	 * The processor is currently spinning, waiting for the
-	 * cpu_start field to become non-zero After we set cpu_start,
-	 * the processor will continue on to secondary_start
-	 */
-	paca[nr].cpu_start = 1;
-
-	return 0;
-}
-
-static int __init smp_a2_probe(void)
-{
-	return num_possible_cpus();
-}
-
-static struct smp_ops_t a2_smp_ops = {
-	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
-	.cause_ipi	= doorbell_cause_ipi,
-	.probe		= smp_a2_probe,
-	.kick_cpu	= smp_a2_kick_cpu,
-	.setup_cpu	= smp_a2_setup_cpu,
-};
-
-void __init a2_setup_smp(void)
-{
-	smp_ops = &a2_smp_ops;
-}

diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c
deleted file mode 100644
index 58cd1f0..0000000
--- a/arch/powerpc/platforms/wsp/wsp.c
+++ /dev/null

@@ -1,117 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/time.h>
-#include <linux/of_address.h>
-
-#include <asm/scom.h>
-
-#include "wsp.h"
-#include "ics.h"
-
-#define WSP_SOC_COMPATIBLE	"ibm,wsp-soc"
-#define PBIC_COMPATIBLE		"ibm,wsp-pbic"
-#define COPRO_COMPATIBLE	"ibm,wsp-coprocessor"
-
-static int __init wsp_probe_buses(void)
-{
-	static __initdata struct of_device_id bus_ids[] = {
-		/*
-		 * every node in between needs to be here or you won't
-		 * find it
-		 */
-		{ .compatible = WSP_SOC_COMPATIBLE, },
-		{ .compatible = PBIC_COMPATIBLE, },
-		{ .compatible = COPRO_COMPATIBLE, },
-		{},
-	};
-	of_platform_bus_probe(NULL, bus_ids, NULL);
-
-	return 0;
-}
-
-void __init wsp_setup_arch(void)
-{
-	/* init to some ~sane value until calibrate_delay() runs */
-	loops_per_jiffy = 50000000;
-
-	scom_init_wsp();
-
-	/* Setup SMP callback */
-#ifdef CONFIG_SMP
-	a2_setup_smp();
-#endif
-#ifdef CONFIG_PCI
-	wsp_setup_pci();
-#endif
-}
-
-void __init wsp_setup_irq(void)
-{
-	wsp_init_irq();
-	opb_pic_init();
-}
-
-
-int __init wsp_probe_devices(void)
-{
-	struct device_node *np;
-
-	/* Our RTC is a ds1500. It seems to be programatically compatible
-	 * with the ds1511 for which we have a driver so let's use that
-	 */
-	np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
-	if (np != NULL) {
-		struct resource res;
-		if (of_address_to_resource(np, 0, &res) == 0)
-			platform_device_register_simple("ds1511", 0, &res, 1);
-	}
-
-	wsp_probe_buses();
-
-	return 0;
-}
-
-void wsp_halt(void)
-{
-	u64 val;
-	scom_map_t m;
-	struct device_node *dn;
-	struct device_node *mine;
-	struct device_node *me;
-	int rc;
-
-	me = of_get_cpu_node(smp_processor_id(), NULL);
-	mine = scom_find_parent(me);
-
-	/* This will halt all the A2s but not power off the chip */
-	for_each_node_with_property(dn, "scom-controller") {
-		if (dn == mine)
-			continue;
-		m = scom_map(dn, 0, 1);
-
-		/* read-modify-write it so the HW probe does not get
-		 * confused */
-		rc = scom_read(m, 0, &val);
-		if (rc == 0)
-			scom_write(m, 0, val | 1);
-		scom_unmap(m);
-	}
-	m = scom_map(mine, 0, 1);
-	rc = scom_read(m, 0, &val);
-	if (rc == 0)
-		scom_write(m, 0, val | 1);
-	/* should never return */
-	scom_unmap(m);
-}

diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
deleted file mode 100644
index a563a8a..0000000
--- a/arch/powerpc/platforms/wsp/wsp.h
+++ /dev/null

@@ -1,29 +0,0 @@
-#ifndef __WSP_H
-#define __WSP_H
-
-#include <asm/wsp.h>
-
-/* Devtree compatible strings for major devices */
-#define PCIE_COMPATIBLE     "ibm,wsp-pciex"
-
-extern void wsp_setup_arch(void);
-extern void wsp_setup_irq(void);
-extern int wsp_probe_devices(void);
-extern void wsp_halt(void);
-
-extern void wsp_setup_pci(void);
-extern void scom_init_wsp(void);
-
-extern void a2_setup_smp(void);
-extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
-			       struct device_node *np);
-extern int smp_a2_kick_cpu(int nr);
-
-extern void opb_pic_init(void);
-
-/* chroma specific managment */
-extern void wsp_h8_restart(char *cmd);
-extern void wsp_h8_power_off(void);
-extern void __init wsp_setup_h8(void);
-
-#endif /*  __WSP_H */

diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c
deleted file mode 100644
index 9a15e5b..0000000
--- a/arch/powerpc/platforms/wsp/wsp_pci.c
+++ /dev/null

@@ -1,1134 +0,0 @@
-/*
- * Copyright 2010 Ben Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define DEBUG
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/debugfs.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-#include <asm/iommu.h>
-#include <asm/io-workarounds.h>
-#include <asm/debug.h>
-
-#include "wsp.h"
-#include "wsp_pci.h"
-#include "msi.h"
-
-
-/* Max number of TVTs for one table. Only 32-bit tables can use
- * multiple TVTs and so the max currently supported is thus 8
- * since only 2G of DMA space is supported
- */
-#define MAX_TABLE_TVT_COUNT		8
-
-struct wsp_dma_table {
-	struct list_head	link;
-	struct iommu_table	table;
-	struct wsp_phb	*phb;
-	struct page		*tces[MAX_TABLE_TVT_COUNT];
-};
-
-/* We support DMA regions from 0...2G in 32bit space (no support for
- * 64-bit DMA just yet). Each device gets a separate TCE table (TVT
- * entry) with validation enabled (though not supported by SimiCS
- * just yet).
- *
- * To simplify things, we divide this 2G space into N regions based
- * on the constant below which could be turned into a tunable eventually
- *
- * We then assign dynamically those regions to devices as they show up.
- *
- * We use a bitmap as an allocator for these.
- *
- * Tables are allocated/created dynamically as devices are discovered,
- * multiple TVT entries are used if needed
- *
- * When 64-bit DMA support is added we should simply use a separate set
- * of larger regions (the HW supports 64 TVT entries). We can
- * additionally create a bypass region in 64-bit space for performances
- * though that would have a cost in term of security.
- *
- * If you set NUM_DMA32_REGIONS to 1, then a single table is shared
- * for all devices and bus/dev/fn validation is disabled
- *
- * Note that a DMA32 region cannot be smaller than 256M so the max
- * supported here for now is 8. We don't yet support sharing regions
- * between multiple devices so the max number of devices supported
- * is MAX_TABLE_TVT_COUNT.
- */
-#define NUM_DMA32_REGIONS	1
-
-struct wsp_phb {
-	struct pci_controller	*hose;
-
-	/* Lock controlling access to the list of dma tables.
-	 * It does -not- protect against dma_* operations on
-	 * those tables, those should be stopped before an entry
-	 * is removed from the list.
-	 *
-	 * The lock is also used for error handling operations
-	 */
-	spinlock_t		lock;
-	struct list_head	dma_tables;
-	unsigned long		dma32_map;
-	unsigned long		dma32_base;
-	unsigned int		dma32_num_regions;
-	unsigned long		dma32_region_size;
-
-	/* Debugfs stuff */
-	struct dentry		*ddir;
-
-	struct list_head	all;
-};
-static LIST_HEAD(wsp_phbs);
-
-//#define cfg_debug(fmt...)	pr_debug(fmt)
-#define cfg_debug(fmt...)
-
-
-static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
-				  int offset, int len, u32 *val)
-{
-	struct pci_controller *hose;
-	int suboff;
-	u64 addr;
-
-	hose = pci_bus_to_host(bus);
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= 0x1000)
-		return  PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = PCIE_REG_CA_ENABLE |
-		((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT |
-		((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT |
-		((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT;
-	suboff = offset & 3;
-
-	/*
-	 * Note: the caller has already checked that offset is
-	 * suitably aligned and that len is 1, 2 or 4.
-	 */
-
-	switch (len) {
-	case 1:
-		addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA)
-			>> (suboff << 3)) & 0xff;
-		cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	case 2:
-		addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA)
-			>> (suboff << 3)) & 0xffff;
-		cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	default:
-		addr |= 0xful << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA);
-		cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
-				   int offset, int len, u32 val)
-{
-	struct pci_controller *hose;
-	int suboff;
-	u64 addr;
-
-	hose = pci_bus_to_host(bus);
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= 0x1000)
-		return  PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = PCIE_REG_CA_ENABLE |
-		((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT |
-		((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT |
-		((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT;
-	suboff = offset & 3;
-
-	/*
-	 * Note: the caller has already checked that offset is
-	 * suitably aligned and that len is 1, 2 or 4.
-	 */
-	switch (len) {
-	case 1:
-		addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		val <<= suboff << 3;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	case 2:
-		addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		val <<= suboff << 3;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	default:
-		addr |= 0xful << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops wsp_pcie_pci_ops =
-{
-	.read = wsp_pcie_read_config,
-	.write = wsp_pcie_write_config,
-};
-
-#define TCE_SHIFT		12
-#define TCE_PAGE_SIZE		(1 << TCE_SHIFT)
-#define TCE_PCI_WRITE		0x2		 /* write from PCI allowed */
-#define TCE_PCI_READ		0x1	 	 /* read from PCI allowed */
-#define TCE_RPN_MASK		0x3fffffffffful  /* 42-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT		12
-
-//#define dma_debug(fmt...)	pr_debug(fmt)
-#define dma_debug(fmt...)
-
-static int tce_build_wsp(struct iommu_table *tbl, long index, long npages,
-			   unsigned long uaddr, enum dma_data_direction direction,
-			   struct dma_attrs *attrs)
-{
-	struct wsp_dma_table *ptbl = container_of(tbl,
-						    struct wsp_dma_table,
-						    table);
-	u64 proto_tce;
-	u64 *tcep;
-	u64 rpn;
-
-	proto_tce = TCE_PCI_READ;
-#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	proto_tce |= TCE_PCI_WRITE;
-#else
-	if (direction != DMA_TO_DEVICE)
-		proto_tce |= TCE_PCI_WRITE;
-#endif
-
-	/* XXX Make this faster by factoring out the page address for
-	 * within a TCE table
-	 */
-	while (npages--) {
-		/* We don't use it->base as the table can be scattered */
-		tcep = (u64 *)page_address(ptbl->tces[index >> 16]);
-		tcep += (index & 0xffff);
-
-		/* can't move this out since we might cross LMB boundary */
-		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
-
-		dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n",
-			  tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K);
-
-		uaddr += TCE_PAGE_SIZE;
-		index++;
-	}
-	return 0;
-}
-
-static void tce_free_wsp(struct iommu_table *tbl, long index, long npages)
-{
-	struct wsp_dma_table *ptbl = container_of(tbl,
-						    struct wsp_dma_table,
-						    table);
-#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	struct pci_controller *hose = ptbl->phb->hose;
-#endif
-	u64 *tcep;
-
-	/* XXX Make this faster by factoring out the page address for
-	 * within a TCE table. Also use line-kill option to kill multiple
-	 * TCEs at once
-	 */
-	while (npages--) {
-		/* We don't use it->base as the table can be scattered */
-		tcep = (u64 *)page_address(ptbl->tces[index >> 16]);
-		tcep += (index & 0xffff);
-		dma_debug("[DMA] TCE %p cleared\n", tcep);
-		*tcep = 0;
-#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-		/* Don't write there since it would pollute other MMIO accesses */
-		out_be64(hose->cfg_data + PCIE_REG_TCE_KILL,
-			 PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K |
-			 (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK));
-#endif
-		index++;
-	}
-}
-
-static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb,
-							    unsigned int region,
-							    struct pci_dev *validate)
-{
-	struct pci_controller *hose = phb->hose;
-	unsigned long size = phb->dma32_region_size;
-	unsigned long addr = phb->dma32_region_size * region + phb->dma32_base;
-	struct wsp_dma_table *tbl;
-	int tvts_per_table, i, tvt, nid;
-	unsigned long flags;
-
-	nid = of_node_to_nid(phb->hose->dn);
-
-	/* Calculate how many TVTs are needed */
-	tvts_per_table = size / 0x10000000;
-	if (tvts_per_table == 0)
-		tvts_per_table = 1;
-
-	/* Calculate the base TVT index. We know all tables have the same
-	 * size so we just do a simple multiply here
-	 */
-	tvt = region * tvts_per_table;
-
-	pr_debug("         Region : %d\n", region);
-	pr_debug("      DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1);
-	pr_debug(" Number of TVTs : %d\n", tvts_per_table);
-	pr_debug("       Base TVT : %d\n", tvt);
-	pr_debug("         Node   : %d\n", nid);
-
-	tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid);
-	if (!tbl)
-		return ERR_PTR(-ENOMEM);
-	tbl->phb = phb;
-
-	/* Create as many TVTs as needed, each represents 256M at most */
-	for (i = 0; i < tvts_per_table; i++) {
-		u64 tvt_data1, tvt_data0;
-
-		/* Allocate table. We use a 4K TCE size for now always so
-		 * one table is always 8 * (258M / 4K) == 512K
-		 */
-		tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000));
-		if (tbl->tces[i] == NULL)
-			goto fail;
-		memset(page_address(tbl->tces[i]), 0, 0x80000);
-
-		pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i]));
-
-		/* Table size. We currently set it to be the whole 256M region */
-		tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT;
-		/* IO page size set to 4K */
-		tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT;
-		/* Shift in the address */
-		tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT;
-
-		/* Validation stuff. We only validate fully bus/dev/fn for now
-		 * one day maybe we can group devices but that isn't the case
-		 * at the moment
-		 */
-		if (validate) {
-			tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK;
-			tvt_data0 |= validate->bus->number;
-			tvt_data1 |= IODA_TVT1_DEVNUM_VALID;
-			tvt_data1 |= ((u64)PCI_SLOT(validate->devfn))
-				<< IODA_TVT1_DEVNUM_VALUE_SHIFT;
-			tvt_data1 |= IODA_TVT1_FUNCNUM_VALID;
-			tvt_data1 |= ((u64)PCI_FUNC(validate->devfn))
-				<< IODA_TVT1_FUNCNUM_VALUE_SHIFT;
-		}
-
-		/* XX PE number is always 0 for now */
-
-		/* Program the values using the PHB lock */
-		spin_lock_irqsave(&phb->lock, flags);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR,
-			 (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0);
-		spin_unlock_irqrestore(&phb->lock, flags);
-	}
-
-	/* Init bits and pieces */
-	tbl->table.it_blocksize = 16;
-	tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
-	tbl->table.it_offset = addr >> tbl->table.it_page_shift;
-	tbl->table.it_size = size >> tbl->table.it_page_shift;
-
-	/*
-	 * It's already blank but we clear it anyway.
-	 * Consider an aditiona interface that makes cleaing optional
-	 */
-	iommu_init_table(&tbl->table, nid);
-
-	list_add(&tbl->link, &phb->dma_tables);
-	return tbl;
-
- fail:
-	pr_debug("  Failed to allocate a 256M TCE table !\n");
-	for (i = 0; i < tvts_per_table; i++)
-		if (tbl->tces[i])
-			__free_pages(tbl->tces[i], get_order(0x80000));
-	kfree(tbl);
-	return ERR_PTR(-ENOMEM);
-}
-
-static void wsp_pci_dma_dev_setup(struct pci_dev *pdev)
-{
-	struct dev_archdata *archdata = &pdev->dev.archdata;
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct wsp_phb *phb = hose->private_data;
-	struct wsp_dma_table *table = NULL;
-	unsigned long flags;
-	int i;
-
-	/* Don't assign an iommu table to a bridge */
-	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
-		return;
-
-	pr_debug("%s: Setting up DMA...\n", pci_name(pdev));
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	/* If only one region, check if it already exist */
-	if (phb->dma32_num_regions == 1) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		if (list_empty(&phb->dma_tables))
-			table = wsp_pci_create_dma32_table(phb, 0, NULL);
-		else
-			table = list_first_entry(&phb->dma_tables,
-						 struct wsp_dma_table,
-						 link);
-	} else {
-		/* else find a free region */
-		for (i = 0; i < phb->dma32_num_regions && !table; i++) {
-			if (__test_and_set_bit(i, &phb->dma32_map))
-				continue;
-			spin_unlock_irqrestore(&phb->lock, flags);
-			table = wsp_pci_create_dma32_table(phb, i, pdev);
-		}
-	}
-
-	/* Check if we got an error */
-	if (IS_ERR(table)) {
-		pr_err("%s: Failed to create DMA table, err %ld !\n",
-		       pci_name(pdev), PTR_ERR(table));
-		return;
-	}
-
-	/* Or a valid table */
-	if (table) {
-		pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n",
-			pci_name(pdev),
-			table->table.it_offset << IOMMU_PAGE_SHIFT_4K,
-			(table->table.it_offset << IOMMU_PAGE_SHIFT_4K)
-			+ phb->dma32_region_size - 1);
-		archdata->dma_data.iommu_table_base = &table->table;
-		return;
-	}
-
-	/* Or no room */
-	spin_unlock_irqrestore(&phb->lock, flags);
-	pr_err("%s: Out of DMA space !\n", pci_name(pdev));
-}
-
-static void __init wsp_pcie_configure_hw(struct pci_controller *hose)
-{
-	u64 val;
-	int i;
-
-#define DUMP_REG(x) \
-	pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x))
-
-	/*
-	 * Some WSP variants  has a bogus class code by default in the PCI-E
-	 * root complex's built-in P2P bridge
-	 */
-	val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1);
-	pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val);
-	out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1,
-		 (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8));
-	pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1));
-
-#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	/* XXX Disable TCE caching, it doesn't work on DD1 */
-	out_be64(hose->cfg_data + 0xe50,
-		 in_be64(hose->cfg_data + 0xe50) | (3ull << 62));
-	printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50));
-#endif
-
-	/* Configure M32A and IO. IO is hard wired to be 1M for now */
-	out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys);
-	out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK,
-		 (~(hose->io_resource.end - hose->io_resource.start)) &
-		 0x3fffffff000ul);
-	out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1);
-
-	out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR,
-		 hose->mem_resources[0].start);
-	printk("Want to write to M32A_BASE_MASK : 0x%llx\n",
-		 (~(hose->mem_resources[0].end -
-		    hose->mem_resources[0].start)) & 0x3ffffff0000ul);
-	out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK,
-		 (~(hose->mem_resources[0].end -
-		    hose->mem_resources[0].start)) & 0x3ffffff0000ul);
-	out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR,
-		 (hose->mem_resources[0].start - hose->mem_offset[0]) | 1);
-
-	/* Clear all TVT entries
-	 *
-	 * XX Might get TVT count from device-tree
-	 */
-	for (i = 0; i < IODA_TVT_COUNT; i++) {
-		out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR,
-			 PCIE_REG_IODA_AD_TBL_TVT | i);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0);
-	}
-
-	/* Kill the TCE cache */
-	out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG,
-		 in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) |
-		 PCIE_REG_PHBC_64B_TCE_EN);
-
-	/* Enable 32 & 64-bit MSIs, IO space and M32A */
-	val = PCIE_REG_PHBC_32BIT_MSI_EN |
-	      PCIE_REG_PHBC_IO_EN |
-	      PCIE_REG_PHBC_64BIT_MSI_EN |
-	      PCIE_REG_PHBC_M32A_EN;
-	if (iommu_is_off)
-		val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS;
-	pr_debug("Will write config: 0x%llx\n", val);
-	out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val);
-
-	/* Enable error reporting */
-	out_be64(hose->cfg_data + 0xe00,
-		 in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull);
-
-	/* Mask an error that's generated when doing config space probe
-	 *
-	 * XXX Maybe we should only mask it around config space cycles... that or
-	 * ignore it when we know we had a config space cycle recently ?
-	 */
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull);
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull);
-
-	/* Enable UTL errors, for now, all of them got to UTL irq 1
-	 *
-	 * We similarily mask one UTL error caused apparently during normal
-	 * probing. We also mask the link up error
-	 */
-	out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull);
-	out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull);
-	out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull);
-
-	DUMP_REG(PCIE_REG_IO_BASE_ADDR);
-	DUMP_REG(PCIE_REG_IO_BASE_MASK);
-	DUMP_REG(PCIE_REG_IO_START_ADDR);
-	DUMP_REG(PCIE_REG_M32A_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M32A_BASE_MASK);
-	DUMP_REG(PCIE_REG_M32A_START_ADDR);
-	DUMP_REG(PCIE_REG_M32B_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M32B_BASE_MASK);
-	DUMP_REG(PCIE_REG_M32B_START_ADDR);
-	DUMP_REG(PCIE_REG_M64_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M64_BASE_MASK);
-	DUMP_REG(PCIE_REG_M64_START_ADDR);
-	DUMP_REG(PCIE_REG_PHB_CONFIG);
-}
-
-static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < 10000; i++) {
-		val = in_be64(phb->hose->cfg_data + 0xe08);
-		if ((val & 0x1900000000000000ull) == 0x0100000000000000ull)
-			return;
-		udelay(1);
-	}
-	pr_warning("PCI IO timeout on domain %d port 0x%lx\n",
-		   phb->hose->global_number, port);
-}
-
-#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa)		\
-static ret wsp_pci_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	struct wsp_phb *phb;					\
-	unsigned long flags;					\
-	ret rval;						\
-	bus = iowa_pio_find_bus(aa);				\
-	WARN_ON(!bus);						\
-	phb = bus->private;					\
-	spin_lock_irqsave(&phb->lock, flags);			\
-	wsp_pci_wait_io_idle(phb, aa);				\
-	rval = __do_##name al;					\
-	spin_unlock_irqrestore(&phb->lock, flags);		\
-	return rval;						\
-}
-
-#define DEF_PCI_AC_NORET_pio(name, at, al, aa)			\
-static void wsp_pci_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	struct wsp_phb *phb;					\
-	unsigned long flags;					\
-	bus = iowa_pio_find_bus(aa);				\
-	WARN_ON(!bus);						\
-	phb = bus->private;					\
-	spin_lock_irqsave(&phb->lock, flags);			\
-	wsp_pci_wait_io_idle(phb, aa);				\
-	__do_##name al;						\
-	spin_unlock_irqrestore(&phb->lock, flags);		\
-}
-
-#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa)
-#define DEF_PCI_AC_NORET_mem(name, at, al, aa)
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
-	DEF_PCI_AC_RET_##space(name, ret, at, al, aa)
-
-#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
-	DEF_PCI_AC_NORET_##space(name, at, al, aa)		\
-
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-static struct ppc_pci_io wsp_pci_iops = {
-	.inb = wsp_pci_inb,
-	.inw = wsp_pci_inw,
-	.inl = wsp_pci_inl,
-	.outb = wsp_pci_outb,
-	.outw = wsp_pci_outw,
-	.outl = wsp_pci_outl,
-	.insb = wsp_pci_insb,
-	.insw = wsp_pci_insw,
-	.insl = wsp_pci_insl,
-	.outsb = wsp_pci_outsb,
-	.outsw = wsp_pci_outsw,
-	.outsl = wsp_pci_outsl,
-};
-
-static int __init wsp_setup_one_phb(struct device_node *np)
-{
-	struct pci_controller *hose;
-	struct wsp_phb *phb;
-
-	pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name);
-
-	phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL);
-	if (!phb)
-		return -ENOMEM;
-	hose = pcibios_alloc_controller(np);
-	if (!hose) {
-		/* Can't really free the phb */
-		return -ENOMEM;
-	}
-	hose->private_data = phb;
-	phb->hose = hose;
-
-	INIT_LIST_HEAD(&phb->dma_tables);
-	spin_lock_init(&phb->lock);
-
-	/* XXX Use bus-range property ? */
-	hose->first_busno = 0;
-	hose->last_busno = 0xff;
-
-	/* We use cfg_data as the address for the whole bridge MMIO space
-	 */
-	hose->cfg_data = of_iomap(hose->dn, 0);
-
-	pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data);
-
-	/* Get the ranges of the device-tree */
-	pci_process_bridge_OF_ranges(hose, np, 0);
-
-	/* XXX Force re-assigning of everything for now */
-	pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC |
-		      PCI_ENABLE_PROC_DOMAINS);
-
-	/* Calculate how the TCE space is divided */
-	phb->dma32_base		= 0;
-	phb->dma32_num_regions	= NUM_DMA32_REGIONS;
-	if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) {
-		pr_warning("IOMMU: Clamped to %d DMA32 regions\n",
-			   MAX_TABLE_TVT_COUNT);
-		phb->dma32_num_regions = MAX_TABLE_TVT_COUNT;
-	}
-	phb->dma32_region_size	= 0x80000000 / phb->dma32_num_regions;
-
-	BUG_ON(!is_power_of_2(phb->dma32_region_size));
-
-	/* Setup config ops */
-	hose->ops = &wsp_pcie_pci_ops;
-
-	/* Configure the HW */
-	wsp_pcie_configure_hw(hose);
-
-	/* Instanciate IO workarounds */
-	iowa_register_bus(hose, &wsp_pci_iops, NULL, phb);
-#ifdef CONFIG_PCI_MSI
-	wsp_setup_phb_msi(hose);
-#endif
-
-	/* Add to global list */
-	list_add(&phb->all, &wsp_phbs);
-
-	return 0;
-}
-
-void __init wsp_setup_pci(void)
-{
-	struct device_node *np;
-	int rc;
-
-	/* Find host bridges */
-	for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) {
-		rc = wsp_setup_one_phb(np);
-		if (rc)
-			pr_err("Failed to setup PCIe bridge %s, rc=%d\n",
-			       np->full_name, rc);
-	}
-
-	/* Establish device-tree linkage */
-	pci_devs_phb_init();
-
-	/* Set DMA ops to use TCEs */
-	if (iommu_is_off) {
-		pr_info("PCI-E: Disabled TCEs, using direct DMA\n");
-		set_pci_dma_ops(&dma_direct_ops);
-	} else {
-		ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup;
-		ppc_md.tce_build = tce_build_wsp;
-		ppc_md.tce_free = tce_free_wsp;
-		set_pci_dma_ops(&dma_iommu_ops);
-	}
-}
-
-#define err_debug(fmt...)	pr_debug(fmt)
-//#define err_debug(fmt...)
-
-static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np)
-{
-	const u32 *prop;
-	int hw_irq;
-
-	/* Ok, no interrupts property, let's try to find our child P2P */
-	np = of_get_next_child(np, NULL);
-	if (np == NULL)
-		return 0;
-
-	/* Grab it's interrupt map */
-	prop = of_get_property(np, "interrupt-map", NULL);
-	if (prop == NULL)
-		return 0;
-
-	/* Grab one of the interrupts in there, keep the low 4 bits */
-	hw_irq = prop[5] & 0xf;
-
-	/* 0..4 for PHB 0 and 5..9 for PHB 1 */
-	if (hw_irq < 5)
-		hw_irq = 4;
-	else
-		hw_irq = 9;
-	hw_irq |= prop[5] & ~0xf;
-
-	err_debug("PCI: Using 0x%x as error IRQ for %s\n",
-		  hw_irq, np->parent->full_name);
-	return irq_create_mapping(NULL, hw_irq);
-}
-
-static const struct {
-	u32 offset;
-	const char *name;
-} wsp_pci_regs[] = {
-#define DREG(x) { PCIE_REG_##x, #x }
-#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x }
-	/* Architected registers except CONFIG_ and IODA
-         * to avoid side effects
-	 */
-	DREG(DMA_CHAN_STATUS),
-	DREG(CPU_LOADSTORE_STATUS),
-	DREG(LOCK0),
-	DREG(LOCK1),
-	DREG(PHB_CONFIG),
-	DREG(IO_BASE_ADDR),
-	DREG(IO_BASE_MASK),
-	DREG(IO_START_ADDR),
-	DREG(M32A_BASE_ADDR),
-	DREG(M32A_BASE_MASK),
-	DREG(M32A_START_ADDR),
-	DREG(M32B_BASE_ADDR),
-	DREG(M32B_BASE_MASK),
-	DREG(M32B_START_ADDR),
-	DREG(M64_BASE_ADDR),
-	DREG(M64_BASE_MASK),
-	DREG(M64_START_ADDR),
-	DREG(TCE_KILL),
-	DREG(LOCK2),
-	DREG(PHB_GEN_CAP),
-	DREG(PHB_TCE_CAP),
-	DREG(PHB_IRQ_CAP),
-	DREG(PHB_EEH_CAP),
-	DREG(PAPR_ERR_INJ_CONTROL),
-	DREG(PAPR_ERR_INJ_ADDR),
-	DREG(PAPR_ERR_INJ_MASK),
-
-	/* UTL core regs */
-	DUTL(SYS_BUS_CONTROL),
-	DUTL(STATUS),
-	DUTL(SYS_BUS_AGENT_STATUS),
-	DUTL(SYS_BUS_AGENT_ERR_SEV),
-	DUTL(SYS_BUS_AGENT_IRQ_EN),
-	DUTL(SYS_BUS_BURST_SZ_CONF),
-	DUTL(REVISION_ID),
-	DUTL(OUT_POST_HDR_BUF_ALLOC),
-	DUTL(OUT_POST_DAT_BUF_ALLOC),
-	DUTL(IN_POST_HDR_BUF_ALLOC),
-	DUTL(IN_POST_DAT_BUF_ALLOC),
-	DUTL(OUT_NP_BUF_ALLOC),
-	DUTL(IN_NP_BUF_ALLOC),
-	DUTL(PCIE_TAGS_ALLOC),
-	DUTL(GBIF_READ_TAGS_ALLOC),
-
-	DUTL(PCIE_PORT_CONTROL),
-	DUTL(PCIE_PORT_STATUS),
-	DUTL(PCIE_PORT_ERROR_SEV),
-	DUTL(PCIE_PORT_IRQ_EN),
-	DUTL(RC_STATUS),
-	DUTL(RC_ERR_SEVERITY),
-	DUTL(RC_IRQ_EN),
-	DUTL(EP_STATUS),
-	DUTL(EP_ERR_SEVERITY),
-	DUTL(EP_ERR_IRQ_EN),
-	DUTL(PCI_PM_CTRL1),
-	DUTL(PCI_PM_CTRL2),
-
-	/* PCIe stack regs */
-	DREG(SYSTEM_CONFIG1),
-	DREG(SYSTEM_CONFIG2),
-	DREG(EP_SYSTEM_CONFIG),
-	DREG(EP_FLR),
-	DREG(EP_BAR_CONFIG),
-	DREG(LINK_CONFIG),
-	DREG(PM_CONFIG),
-	DREG(DLP_CONTROL),
-	DREG(DLP_STATUS),
-	DREG(ERR_REPORT_CONTROL),
-	DREG(SLOT_CONTROL1),
-	DREG(SLOT_CONTROL2),
-	DREG(UTL_CONFIG),
-	DREG(BUFFERS_CONFIG),
-	DREG(ERROR_INJECT),
-	DREG(SRIOV_CONFIG),
-	DREG(PF0_SRIOV_STATUS),
-	DREG(PF1_SRIOV_STATUS),
-	DREG(PORT_NUMBER),
-	DREG(POR_SYSTEM_CONFIG),
-
-	/* Internal logic regs */
-	DREG(PHB_VERSION),
-	DREG(RESET),
-	DREG(PHB_CONTROL),
-	DREG(PHB_TIMEOUT_CONTROL1),
-	DREG(PHB_QUIESCE_DMA),
-	DREG(PHB_DMA_READ_TAG_ACTV),
-	DREG(PHB_TCE_READ_TAG_ACTV),
-
-	/* FIR registers */
-	DREG(LEM_FIR_ACCUM),
-	DREG(LEM_FIR_AND_MASK),
-	DREG(LEM_FIR_OR_MASK),
-	DREG(LEM_ACTION0),
-	DREG(LEM_ACTION1),
-	DREG(LEM_ERROR_MASK),
-	DREG(LEM_ERROR_AND_MASK),
-	DREG(LEM_ERROR_OR_MASK),
-
-	/* Error traps registers */
-	DREG(PHB_ERR_STATUS),
-	DREG(PHB_ERR_STATUS),
-	DREG(PHB_ERR1_STATUS),
-	DREG(PHB_ERR_INJECT),
-	DREG(PHB_ERR_LEM_ENABLE),
-	DREG(PHB_ERR_IRQ_ENABLE),
-	DREG(PHB_ERR_FREEZE_ENABLE),
-	DREG(PHB_ERR_SIDE_ENABLE),
-	DREG(PHB_ERR_LOG_0),
-	DREG(PHB_ERR_LOG_1),
-	DREG(PHB_ERR_STATUS_MASK),
-	DREG(PHB_ERR1_STATUS_MASK),
-	DREG(MMIO_ERR_STATUS),
-	DREG(MMIO_ERR1_STATUS),
-	DREG(MMIO_ERR_INJECT),
-	DREG(MMIO_ERR_LEM_ENABLE),
-	DREG(MMIO_ERR_IRQ_ENABLE),
-	DREG(MMIO_ERR_FREEZE_ENABLE),
-	DREG(MMIO_ERR_SIDE_ENABLE),
-	DREG(MMIO_ERR_LOG_0),
-	DREG(MMIO_ERR_LOG_1),
-	DREG(MMIO_ERR_STATUS_MASK),
-	DREG(MMIO_ERR1_STATUS_MASK),
-	DREG(DMA_ERR_STATUS),
-	DREG(DMA_ERR1_STATUS),
-	DREG(DMA_ERR_INJECT),
-	DREG(DMA_ERR_LEM_ENABLE),
-	DREG(DMA_ERR_IRQ_ENABLE),
-	DREG(DMA_ERR_FREEZE_ENABLE),
-	DREG(DMA_ERR_SIDE_ENABLE),
-	DREG(DMA_ERR_LOG_0),
-	DREG(DMA_ERR_LOG_1),
-	DREG(DMA_ERR_STATUS_MASK),
-	DREG(DMA_ERR1_STATUS_MASK),
-
-	/* Debug and Trace registers */
-	DREG(PHB_DEBUG_CONTROL0),
-	DREG(PHB_DEBUG_STATUS0),
-	DREG(PHB_DEBUG_CONTROL1),
-	DREG(PHB_DEBUG_STATUS1),
-	DREG(PHB_DEBUG_CONTROL2),
-	DREG(PHB_DEBUG_STATUS2),
-	DREG(PHB_DEBUG_CONTROL3),
-	DREG(PHB_DEBUG_STATUS3),
-	DREG(PHB_DEBUG_CONTROL4),
-	DREG(PHB_DEBUG_STATUS4),
-	DREG(PHB_DEBUG_CONTROL5),
-	DREG(PHB_DEBUG_STATUS5),
-
-	/* Don't seem to exist ...
-	DREG(PHB_DEBUG_CONTROL6),
-	DREG(PHB_DEBUG_STATUS6),
-	*/
-};
-
-static int wsp_pci_regs_show(struct seq_file *m, void *private)
-{
-	struct wsp_phb *phb = m->private;
-	struct pci_controller *hose = phb->hose;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) {
-		/* Skip write-only regs */
-		if (wsp_pci_regs[i].offset == 0xc08 ||
-		    wsp_pci_regs[i].offset == 0xc10 ||
-		    wsp_pci_regs[i].offset == 0xc38 ||
-		    wsp_pci_regs[i].offset == 0xc40)
-			continue;
-		seq_printf(m, "0x%03x: 0x%016llx %s\n",
-			   wsp_pci_regs[i].offset,
-			   in_be64(hose->cfg_data + wsp_pci_regs[i].offset),
-			   wsp_pci_regs[i].name);
-	}
-	return 0;
-}
-
-static int wsp_pci_regs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wsp_pci_regs_show, inode->i_private);
-}
-
-static const struct file_operations wsp_pci_regs_fops = {
-	.open = wsp_pci_regs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int wsp_pci_reg_set(void *data, u64 val)
-{
-	out_be64((void __iomem *)data, val);
-	return 0;
-}
-
-static int wsp_pci_reg_get(void *data, u64 *val)
-{
-	*val = in_be64((void __iomem *)data);
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n");
-
-static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id)
-{
-	struct wsp_phb *phb = dev_id;
-	struct pci_controller *hose = phb->hose;
-	irqreturn_t handled = IRQ_NONE;
-	struct wsp_pcie_err_log_data ed;
-
-	pr_err("PCI: Error interrupt on %s (PHB %d)\n",
-	       hose->dn->full_name, hose->global_number);
- again:
-	memset(&ed, 0, sizeof(ed));
-
-	/* Read and clear UTL errors */
-	ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS);
-	if (ed.utl_sys_err)
-		out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err);
-	ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS);
-	if (ed.utl_port_err)
-		out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err);
-	ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS);
-	if (ed.utl_rc_err)
-		out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err);
-
-	/* Read and clear main trap errors */
-	ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS);
-	if (ed.phb_err) {
-		ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS);
-		ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0);
-		ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0);
-	}
-	ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS);
-	if (ed.mmio_err) {
-		ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS);
-		ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0);
-		ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0);
-	}
-	ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS);
-	if (ed.dma_err) {
-		ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS);
-		ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0);
-		ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0);
-	}
-
-	/* Now print things out */
-	if (ed.phb_err) {
-		pr_err("   PHB Error Status      : 0x%016llx\n", ed.phb_err);
-		pr_err("   PHB First Error Status: 0x%016llx\n", ed.phb_err1);
-		pr_err("   PHB Error Log 0       : 0x%016llx\n", ed.phb_log0);
-		pr_err("   PHB Error Log 1       : 0x%016llx\n", ed.phb_log1);
-	}
-	if (ed.mmio_err) {
-		pr_err("  MMIO Error Status      : 0x%016llx\n", ed.mmio_err);
-		pr_err("  MMIO First Error Status: 0x%016llx\n", ed.mmio_err1);
-		pr_err("  MMIO Error Log 0       : 0x%016llx\n", ed.mmio_log0);
-		pr_err("  MMIO Error Log 1       : 0x%016llx\n", ed.mmio_log1);
-	}
-	if (ed.dma_err) {
-		pr_err("   DMA Error Status      : 0x%016llx\n", ed.dma_err);
-		pr_err("   DMA First Error Status: 0x%016llx\n", ed.dma_err1);
-		pr_err("   DMA Error Log 0       : 0x%016llx\n", ed.dma_log0);
-		pr_err("   DMA Error Log 1       : 0x%016llx\n", ed.dma_log1);
-	}
-	if (ed.utl_sys_err)
-		pr_err("   UTL Sys Error Status  : 0x%016llx\n", ed.utl_sys_err);
-	if (ed.utl_port_err)
-		pr_err("   UTL Port Error Status : 0x%016llx\n", ed.utl_port_err);
-	if (ed.utl_rc_err)
-		pr_err("   UTL RC Error Status   : 0x%016llx\n", ed.utl_rc_err);
-
-	/* Interrupts are caused by the error traps. If we had any error there
-	 * we loop again in case the UTL buffered some new stuff between
-	 * going there and going to the traps
-	 */
-	if (ed.dma_err || ed.mmio_err || ed.phb_err) {
-		handled = IRQ_HANDLED;
-		goto again;
-	}
-	return handled;
-}
-
-static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb)
-{
-	struct pci_controller *hose = phb->hose;
-	int err_irq, i, rc;
-	char fname[16];
-
-	/* Create a debugfs file for that PHB */
-	sprintf(fname, "phb%d", phb->hose->global_number);
-	phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root);
-
-	/* Some useful debug output */
-	if (phb->ddir) {
-		struct dentry *d = debugfs_create_dir("regs", phb->ddir);
-		char tmp[64];
-
-		for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) {
-			sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset,
-				wsp_pci_regs[i].name);
-			debugfs_create_file(tmp, 0600, d,
-					    hose->cfg_data + wsp_pci_regs[i].offset,
-					    &wsp_pci_reg_fops);
-		}
-		debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops);
-	}
-
-	/* Find the IRQ number for that PHB */
-	err_irq = irq_of_parse_and_map(hose->dn, 0);
-	if (err_irq == 0)
-		/* XXX Error IRQ lacking from device-tree */
-		err_irq = wsp_pci_get_err_irq_no_dt(hose->dn);
-	if (err_irq == 0) {
-		pr_err("PCI: Failed to fetch error interrupt for %s\n",
-		       hose->dn->full_name);
-		return;
-	}
-	/* Request it */
-	rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb);
-	if (rc) {
-		pr_err("PCI: Failed to request interrupt for %s\n",
-		       hose->dn->full_name);
-	}
-	/* Enable interrupts for all errors for now */
-	out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-	out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-}
-
-/*
- * This is called later to hookup with the error interrupt
- */
-static int __init wsp_setup_pci_late(void)
-{
-	struct wsp_phb *phb;
-
-	list_for_each_entry(phb, &wsp_phbs, all)
-		wsp_setup_pci_err_reporting(phb);
-
-	return 0;
-}
-arch_initcall(wsp_setup_pci_late);

diff --git a/arch/powerpc/platforms/wsp/wsp_pci.h b/arch/powerpc/platforms/wsp/wsp_pci.h
deleted file mode 100644
index 52e9bd9..0000000
--- a/arch/powerpc/platforms/wsp/wsp_pci.h
+++ /dev/null

@@ -1,268 +0,0 @@
-/*
- * Copyright 2010 Ben Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef __WSP_PCI_H
-#define __WSP_PCI_H
-
-/* Architected registers */
-#define PCIE_REG_DMA_CHAN_STATUS	0x110
-#define PCIE_REG_CPU_LOADSTORE_STATUS	0x120
-
-#define PCIE_REG_CONFIG_DATA		0x130
-#define PCIE_REG_LOCK0			0x138
-#define PCIE_REG_CONFIG_ADDRESS		0x140
-#define   PCIE_REG_CA_ENABLE			0x8000000000000000ull
-#define	  PCIE_REG_CA_BUS_MASK			0x0ff0000000000000ull
-#define   PCIE_REG_CA_BUS_SHIFT			(20+32)
-#define   PCIE_REG_CA_DEV_MASK			0x000f800000000000ull
-#define   PCIE_REG_CA_DEV_SHIFT			(15+32)
-#define   PCIE_REG_CA_FUNC_MASK			0x0000700000000000ull
-#define   PCIE_REG_CA_FUNC_SHIFT		(12+32)
-#define   PCIE_REG_CA_REG_MASK			0x00000fff00000000ull
-#define   PCIE_REG_CA_REG_SHIFT			( 0+32)
-#define   PCIE_REG_CA_BE_MASK			0x00000000f0000000ull
-#define   PCIE_REG_CA_BE_SHIFT			(   28)
-#define PCIE_REG_LOCK1			0x148
-
-#define PCIE_REG_PHB_CONFIG		0x160
-#define   PCIE_REG_PHBC_64B_TCE_EN		0x2000000000000000ull
-#define   PCIE_REG_PHBC_MMIO_DMA_FREEZE_EN	0x1000000000000000ull
-#define   PCIE_REG_PHBC_32BIT_MSI_EN		0x0080000000000000ull
-#define   PCIE_REG_PHBC_M64_EN			0x0040000000000000ull
-#define   PCIE_REG_PHBC_IO_EN			0x0008000000000000ull
-#define   PCIE_REG_PHBC_64BIT_MSI_EN		0x0002000000000000ull
-#define   PCIE_REG_PHBC_M32A_EN			0x0000800000000000ull
-#define   PCIE_REG_PHBC_M32B_EN			0x0000400000000000ull
-#define   PCIE_REG_PHBC_MSI_PE_VALIDATE		0x0000200000000000ull
-#define   PCIE_REG_PHBC_DMA_XLATE_BYPASS	0x0000100000000000ull
-
-#define PCIE_REG_IO_BASE_ADDR		0x170
-#define PCIE_REG_IO_BASE_MASK		0x178
-#define PCIE_REG_IO_START_ADDR		0x180
-
-#define PCIE_REG_M32A_BASE_ADDR		0x190
-#define PCIE_REG_M32A_BASE_MASK		0x198
-#define PCIE_REG_M32A_START_ADDR	0x1a0
-
-#define PCIE_REG_M32B_BASE_ADDR		0x1b0
-#define PCIE_REG_M32B_BASE_MASK		0x1b8
-#define PCIE_REG_M32B_START_ADDR	0x1c0
-
-#define PCIE_REG_M64_BASE_ADDR		0x1e0
-#define PCIE_REG_M64_BASE_MASK		0x1e8
-#define PCIE_REG_M64_START_ADDR		0x1f0
-
-#define PCIE_REG_TCE_KILL		0x210
-#define   PCIE_REG_TCEKILL_SINGLE	0x8000000000000000ull
-#define   PCIE_REG_TCEKILL_ADDR_MASK	0x000003fffffffff8ull
-#define   PCIE_REG_TCEKILL_PS_4K	0
-#define   PCIE_REG_TCEKILL_PS_64K	1
-#define   PCIE_REG_TCEKILL_PS_16M	2
-#define   PCIE_REG_TCEKILL_PS_16G	3
-
-#define PCIE_REG_IODA_ADDR		0x220
-#define   PCIE_REG_IODA_AD_AUTOINC	0x8000000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_MVT	0x0005000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PELT	0x0006000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PESTA	0x0007000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PESTB	0x0008000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_TVT	0x0009000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_TCE	0x000a000000000000ull
-#define PCIE_REG_IODA_DATA0		0x228
-#define PCIE_REG_IODA_DATA1		0x230
-
-#define PCIE_REG_LOCK2			0x240
-
-#define PCIE_REG_PHB_GEN_CAP		0x250
-#define PCIE_REG_PHB_TCE_CAP		0x258
-#define PCIE_REG_PHB_IRQ_CAP		0x260
-#define PCIE_REG_PHB_EEH_CAP		0x268
-
-#define PCIE_REG_PAPR_ERR_INJ_CONTROL	0x2b0
-#define PCIE_REG_PAPR_ERR_INJ_ADDR	0x2b8
-#define PCIE_REG_PAPR_ERR_INJ_MASK	0x2c0
-
-
-#define PCIE_REG_SYS_CFG1		0x600
-#define   PCIE_REG_SYS_CFG1_CLASS_CODE	0x0000000000ffffffull
-
-#define IODA_TVT0_TTA_MASK		0x000fffffffff0000ull
-#define IODA_TVT0_TTA_SHIFT		4
-#define IODA_TVT0_BUSNUM_VALID_MASK	0x000000000000e000ull
-#define IODA_TVT0_TCE_TABLE_SIZE_MASK	0x0000000000001f00ull
-#define IODA_TVT0_TCE_TABLE_SIZE_SHIFT	8
-#define IODA_TVT0_BUSNUM_VALUE_MASK	0x00000000000000ffull
-#define IODA_TVT0_BUSNUM_VALID_SHIFT	0
-#define IODA_TVT1_DEVNUM_VALID		0x2000000000000000ull
-#define IODA_TVT1_DEVNUM_VALUE_MASK	0x1f00000000000000ull
-#define IODA_TVT1_DEVNUM_VALUE_SHIFT	56
-#define IODA_TVT1_FUNCNUM_VALID		0x0008000000000000ull
-#define IODA_TVT1_FUNCNUM_VALUE_MASK	0x0007000000000000ull
-#define IODA_TVT1_FUNCNUM_VALUE_SHIFT	48
-#define IODA_TVT1_IO_PAGE_SIZE_MASK	0x00001f0000000000ull
-#define IODA_TVT1_IO_PAGE_SIZE_SHIFT	40
-#define IODA_TVT1_PE_NUMBER_MASK	0x000000000000003full
-#define IODA_TVT1_PE_NUMBER_SHIFT	0
-
-#define IODA_TVT_COUNT			64
-
-/* UTL Core registers */
-#define PCIE_UTL_SYS_BUS_CONTROL	0x400
-#define PCIE_UTL_STATUS			0x408
-#define PCIE_UTL_SYS_BUS_AGENT_STATUS	0x410
-#define PCIE_UTL_SYS_BUS_AGENT_ERR_SEV	0x418
-#define PCIE_UTL_SYS_BUS_AGENT_IRQ_EN	0x420
-#define PCIE_UTL_SYS_BUS_BURST_SZ_CONF	0x440
-#define PCIE_UTL_REVISION_ID		0x448
-
-#define PCIE_UTL_OUT_POST_HDR_BUF_ALLOC	0x4c0
-#define PCIE_UTL_OUT_POST_DAT_BUF_ALLOC	0x4d0
-#define PCIE_UTL_IN_POST_HDR_BUF_ALLOC	0x4e0
-#define PCIE_UTL_IN_POST_DAT_BUF_ALLOC	0x4f0
-#define PCIE_UTL_OUT_NP_BUF_ALLOC	0x500
-#define PCIE_UTL_IN_NP_BUF_ALLOC	0x510
-#define PCIE_UTL_PCIE_TAGS_ALLOC	0x520
-#define PCIE_UTL_GBIF_READ_TAGS_ALLOC	0x530
-
-#define PCIE_UTL_PCIE_PORT_CONTROL	0x540
-#define PCIE_UTL_PCIE_PORT_STATUS	0x548
-#define PCIE_UTL_PCIE_PORT_ERROR_SEV	0x550
-#define PCIE_UTL_PCIE_PORT_IRQ_EN	0x558
-#define PCIE_UTL_RC_STATUS		0x560
-#define PCIE_UTL_RC_ERR_SEVERITY	0x568
-#define PCIE_UTL_RC_IRQ_EN		0x570
-#define PCIE_UTL_EP_STATUS		0x578
-#define PCIE_UTL_EP_ERR_SEVERITY	0x580
-#define PCIE_UTL_EP_ERR_IRQ_EN		0x588
-
-#define PCIE_UTL_PCI_PM_CTRL1		0x590
-#define PCIE_UTL_PCI_PM_CTRL2		0x598
-
-/* PCIe stack registers */
-#define PCIE_REG_SYSTEM_CONFIG1		0x600
-#define PCIE_REG_SYSTEM_CONFIG2		0x608
-#define PCIE_REG_EP_SYSTEM_CONFIG	0x618
-#define PCIE_REG_EP_FLR			0x620
-#define PCIE_REG_EP_BAR_CONFIG		0x628
-#define PCIE_REG_LINK_CONFIG		0x630
-#define PCIE_REG_PM_CONFIG		0x640
-#define PCIE_REG_DLP_CONTROL		0x650
-#define PCIE_REG_DLP_STATUS		0x658
-#define PCIE_REG_ERR_REPORT_CONTROL	0x660
-#define PCIE_REG_SLOT_CONTROL1		0x670
-#define PCIE_REG_SLOT_CONTROL2		0x678
-#define PCIE_REG_UTL_CONFIG		0x680
-#define PCIE_REG_BUFFERS_CONFIG		0x690
-#define PCIE_REG_ERROR_INJECT		0x698
-#define PCIE_REG_SRIOV_CONFIG		0x6a0
-#define PCIE_REG_PF0_SRIOV_STATUS	0x6a8
-#define PCIE_REG_PF1_SRIOV_STATUS	0x6b0
-#define PCIE_REG_PORT_NUMBER		0x700
-#define PCIE_REG_POR_SYSTEM_CONFIG	0x708
-
-/* PHB internal logic registers */
-#define PCIE_REG_PHB_VERSION		0x800
-#define PCIE_REG_RESET			0x808
-#define PCIE_REG_PHB_CONTROL		0x810
-#define PCIE_REG_PHB_TIMEOUT_CONTROL1	0x878
-#define PCIE_REG_PHB_QUIESCE_DMA	0x888
-#define PCIE_REG_PHB_DMA_READ_TAG_ACTV	0x900
-#define PCIE_REG_PHB_TCE_READ_TAG_ACTV	0x908
-
-/* FIR registers */
-#define PCIE_REG_LEM_FIR_ACCUM		0xc00
-#define PCIE_REG_LEM_FIR_AND_MASK	0xc08
-#define PCIE_REG_LEM_FIR_OR_MASK	0xc10
-#define PCIE_REG_LEM_ACTION0		0xc18
-#define PCIE_REG_LEM_ACTION1		0xc20
-#define PCIE_REG_LEM_ERROR_MASK		0xc30
-#define PCIE_REG_LEM_ERROR_AND_MASK	0xc38
-#define PCIE_REG_LEM_ERROR_OR_MASK	0xc40
-
-/* PHB Error registers */
-#define PCIE_REG_PHB_ERR_STATUS		0xc80
-#define PCIE_REG_PHB_ERR1_STATUS	0xc88
-#define PCIE_REG_PHB_ERR_INJECT		0xc90
-#define PCIE_REG_PHB_ERR_LEM_ENABLE	0xc98
-#define PCIE_REG_PHB_ERR_IRQ_ENABLE	0xca0
-#define PCIE_REG_PHB_ERR_FREEZE_ENABLE	0xca8
-#define PCIE_REG_PHB_ERR_SIDE_ENABLE	0xcb8
-#define PCIE_REG_PHB_ERR_LOG_0		0xcc0
-#define PCIE_REG_PHB_ERR_LOG_1		0xcc8
-#define PCIE_REG_PHB_ERR_STATUS_MASK	0xcd0
-#define PCIE_REG_PHB_ERR1_STATUS_MASK	0xcd8
-
-#define PCIE_REG_MMIO_ERR_STATUS	0xd00
-#define PCIE_REG_MMIO_ERR1_STATUS	0xd08
-#define PCIE_REG_MMIO_ERR_INJECT	0xd10
-#define PCIE_REG_MMIO_ERR_LEM_ENABLE	0xd18
-#define PCIE_REG_MMIO_ERR_IRQ_ENABLE	0xd20
-#define PCIE_REG_MMIO_ERR_FREEZE_ENABLE	0xd28
-#define PCIE_REG_MMIO_ERR_SIDE_ENABLE	0xd38
-#define PCIE_REG_MMIO_ERR_LOG_0		0xd40
-#define PCIE_REG_MMIO_ERR_LOG_1		0xd48
-#define PCIE_REG_MMIO_ERR_STATUS_MASK	0xd50
-#define PCIE_REG_MMIO_ERR1_STATUS_MASK	0xd58
-
-#define PCIE_REG_DMA_ERR_STATUS		0xd80
-#define PCIE_REG_DMA_ERR1_STATUS	0xd88
-#define PCIE_REG_DMA_ERR_INJECT		0xd90
-#define PCIE_REG_DMA_ERR_LEM_ENABLE	0xd98
-#define PCIE_REG_DMA_ERR_IRQ_ENABLE	0xda0
-#define PCIE_REG_DMA_ERR_FREEZE_ENABLE	0xda8
-#define PCIE_REG_DMA_ERR_SIDE_ENABLE	0xdb8
-#define PCIE_REG_DMA_ERR_LOG_0		0xdc0
-#define PCIE_REG_DMA_ERR_LOG_1		0xdc8
-#define PCIE_REG_DMA_ERR_STATUS_MASK	0xdd0
-#define PCIE_REG_DMA_ERR1_STATUS_MASK	0xdd8
-
-/* Shortcuts for access to the above using the PHB definitions
- * with an offset
- */
-#define PCIE_REG_ERR_PHB_OFFSET		0x0
-#define PCIE_REG_ERR_MMIO_OFFSET	0x80
-#define PCIE_REG_ERR_DMA_OFFSET		0x100
-
-/* Debug and Trace registers */
-#define PCIE_REG_PHB_DEBUG_CONTROL0	0xe00
-#define PCIE_REG_PHB_DEBUG_STATUS0	0xe08
-#define PCIE_REG_PHB_DEBUG_CONTROL1	0xe10
-#define PCIE_REG_PHB_DEBUG_STATUS1	0xe18
-#define PCIE_REG_PHB_DEBUG_CONTROL2	0xe20
-#define PCIE_REG_PHB_DEBUG_STATUS2	0xe28
-#define PCIE_REG_PHB_DEBUG_CONTROL3	0xe30
-#define PCIE_REG_PHB_DEBUG_STATUS3	0xe38
-#define PCIE_REG_PHB_DEBUG_CONTROL4	0xe40
-#define PCIE_REG_PHB_DEBUG_STATUS4	0xe48
-#define PCIE_REG_PHB_DEBUG_CONTROL5	0xe50
-#define PCIE_REG_PHB_DEBUG_STATUS5	0xe58
-#define PCIE_REG_PHB_DEBUG_CONTROL6	0xe60
-#define PCIE_REG_PHB_DEBUG_STATUS6	0xe68
-
-/* Definition for PCIe errors */
-struct wsp_pcie_err_log_data {
-	__u64	phb_err;
-	__u64	phb_err1;
-	__u64	phb_log0;
-	__u64	phb_log1;
-	__u64	mmio_err;
-	__u64	mmio_err1;
-	__u64	mmio_log0;
-	__u64	mmio_log1;
-	__u64	dma_err;
-	__u64	dma_err1;
-	__u64	dma_log0;
-	__u64	dma_log1;
-	__u64	utl_sys_err;
-	__u64	utl_port_err;
-	__u64	utl_rc_err;
-	__u64	unused;
-};
-
-#endif /* __WSP_PCI_H */

diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 9dee470..de8d948 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c

@@ -26,6 +26,7 @@
 #include <asm/errno.h>
 #include <asm/xics.h>
 #include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
 
 struct icp_ipl {
 	union {
@@ -145,7 +146,13 @@
 static void icp_native_cause_ipi(int cpu, unsigned long data)
 {
 	kvmppc_set_host_ipi(cpu, 1);
-	icp_native_set_qirr(cpu, IPI_PRIORITY);
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL) &&
+	    (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id()))))
+		doorbell_cause_ipi(cpu, data);
+	else
+#endif
+		icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
 void xics_wake_cpu(int cpu)

diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
index bce3dcf..c987486 100644
--- a/arch/powerpc/xmon/nonstdio.c
+++ b/arch/powerpc/xmon/nonstdio.c

@@ -122,7 +122,7 @@
 
 	if (n && rc == 0) {
 		/* No udbg hooks, fallback to printk() - dangerous */
-		printk(xmon_outbuf);
+		printk("%s", xmon_outbuf);
 	}
 }
 

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 36e658a..e4b1a96 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile

@@ -111,8 +111,7 @@
 KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig
 
 archheaders:
-	$(Q)$(MAKE) -C '$(srctree)' KBUILD_SRC= \
-		ARCH=$(HEADER_ARCH) O='$(objtree)' archheaders
+	$(Q)$(MAKE) KBUILD_SRC= ARCH=$(HEADER_ARCH) archheaders
 
 archprepare: include/generated/user_constants.h
 

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b660088..fcefdda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -121,6 +121,7 @@
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
 	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_QUEUE_RWLOCK
 	select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
 	select OLD_SIGACTION if X86_32
 	select COMPAT_OLD_SIGACTION if IA32_EMULATION

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 4582e8e..7730c1c 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h

@@ -57,6 +57,12 @@
 	.long (from) - . ;					\
 	.long (to) - . + 0x7ffffff0 ;				\
 	.popsection
+
+# define _ASM_NOKPROBE(entry)					\
+	.pushsection "_kprobe_blacklist","aw" ;			\
+	_ASM_ALIGN ;						\
+	_ASM_PTR (entry);					\
+	.popsection
 #else
 # define _ASM_EXTABLE(from,to)					\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
@@ -71,6 +77,7 @@
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - . + 0x7ffffff0\n"			\
 	" .popsection\n"
+/* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
 #endif /* _ASM_X86_ASM_H */

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 9454c16..53cdfb2 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h

@@ -116,4 +116,6 @@
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
+extern int kprobe_int3_handler(struct pt_regs *regs);
+extern int kprobe_debug_handler(struct pt_regs *regs);
 #endif /* _ASM_X86_KPROBES_H */

diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
new file mode 100644
index 0000000..70f46f0
--- /dev/null
+++ b/arch/x86/include/asm/qrwlock.h

@@ -0,0 +1,17 @@
+#ifndef _ASM_X86_QRWLOCK_H
+#define _ASM_X86_QRWLOCK_H
+
+#include <asm-generic/qrwlock_types.h>
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+#define queue_write_unlock queue_write_unlock
+static inline void queue_write_unlock(struct qrwlock *lock)
+{
+        barrier();
+        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
+}
+#endif
+
+#include <asm-generic/qrwlock.h>
+
+#endif /* _ASM_X86_QRWLOCK_H */

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 0f62f54..54f1c80 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h

@@ -187,6 +187,7 @@
 		cpu_relax();
 }
 
+#ifndef CONFIG_QUEUE_RWLOCK
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -269,6 +270,9 @@
 	asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
 		     : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
 }
+#else
+#include <asm/qrwlock.h>
+#endif /* CONFIG_QUEUE_RWLOCK */
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)

diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 4f1bea1..73c4c00 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h

@@ -34,6 +34,10 @@
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
+#ifdef CONFIG_QUEUE_RWLOCK
+#include <asm-generic/qrwlock_types.h>
+#else
 #include <asm/rwlock.h>
+#endif
 
 #endif /* _ASM_X86_SPINLOCK_TYPES_H */

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 8ba1884..bc8352e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h

@@ -68,7 +68,7 @@
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
+asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
@@ -103,7 +103,6 @@
 
 extern int panic_on_unrecovered_nmi;
 
-void math_error(struct pt_regs *, int, int);
 void math_emulate(struct math_emu_info *);
 #ifndef CONFIG_X86_32
 asmlinkage void smp_thermal_interrupt(void);

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 93bee7b..74f4c2f 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h

@@ -41,18 +41,18 @@
 		u8			ixol[MAX_UINSN_BYTES];
 	};
 
-	u16				fixups;
 	const struct uprobe_xol_ops	*ops;
 
 	union {
-#ifdef CONFIG_X86_64
-		unsigned long			rip_rela_target_address;
-#endif
 		struct {
 			s32	offs;
 			u8	ilen;
 			u8	opc1;
-		}				branch;
+		}			branch;
+		struct {
+			u8	fixups;
+			u8	ilen;
+		} 			defparam;
 	};
 };
 

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df94598..703130f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c

@@ -5,7 +5,6 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
-#include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/memory.h>
@@ -551,7 +550,7 @@
  *
  * Note: Must be called under text_mutex.
  */
-void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+void *text_poke(void *addr, const void *opcode, size_t len)
 {
 	unsigned long flags;
 	char *vaddr;

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index eab6704..c3fcb5d 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c

@@ -60,7 +60,7 @@
 	smp_mb__after_atomic();
 }
 
-static int __kprobes
+static int
 arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 {
 	int cpu;
@@ -80,6 +80,7 @@
 
 	return NMI_DONE;
 }
+NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler);
 
 static int __init register_trigger_all_cpu_backtrace(void)
 {

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9d0a979..81e08ef 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c

@@ -2297,7 +2297,7 @@
 	int err;
 
 	if (!config_enabled(CONFIG_SMP))
-		return -1;
+		return -EPERM;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
 		return -EINVAL;
@@ -2328,7 +2328,7 @@
 	int ret;
 
 	if (!config_enabled(CONFIG_SMP))
-		return -1;
+		return -EPERM;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	ret = __ioapic_set_affinity(data, mask, &dest);
@@ -3001,9 +3001,11 @@
 	struct irq_cfg *cfg = data->chip_data;
 	struct msi_msg msg;
 	unsigned int dest;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	__get_cached_msi_msg(data->msi_desc, &msg);
 
@@ -3100,9 +3102,11 @@
 	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest, irq = data->irq;
 	struct msi_msg msg;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	dmar_msi_read(irq, &msg);
 
@@ -3149,9 +3153,11 @@
 	struct irq_cfg *cfg = data->chip_data;
 	struct msi_msg msg;
 	unsigned int dest;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	hpet_msi_read(data->handler_data, &msg);
 
@@ -3218,9 +3224,11 @@
 {
 	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest;
+	int ret;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
 
 	target_ht_irq(data->irq, dest, cfg->vector);
 	return IRQ_SET_MASK_OK_NOCOPY;

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2cbbf88..ef1b93f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c

@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/kprobes.h>
 #include <linux/kgdb.h>
 #include <linux/smp.h>
 #include <linux/io.h>
@@ -1193,6 +1194,7 @@
 		(addr <= __get_cpu_var(debug_stack_addr) &&
 		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
 }
+NOKPROBE_SYMBOL(is_debug_stack);
 
 DEFINE_PER_CPU(u32, debug_idt_ctr);
 
@@ -1201,6 +1203,7 @@
 	this_cpu_inc(debug_idt_ctr);
 	load_current_idt();
 }
+NOKPROBE_SYMBOL(debug_stack_set_zero);
 
 void debug_stack_reset(void)
 {
@@ -1209,6 +1212,7 @@
 	if (this_cpu_dec_return(debug_idt_ctr) == 0)
 		load_current_idt();
 }
+NOKPROBE_SYMBOL(debug_stack_reset);
 
 #else	/* CONFIG_X86_64 */
 

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 76f98fe..a450373 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c

@@ -132,15 +132,6 @@
 		lapic_timer_frequency = hv_lapic_frequency;
 		printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
 				lapic_timer_frequency);
-
-		/*
-		 * On Hyper-V, when we are booting off an EFI firmware stack,
-		 * we do not have many legacy devices including PIC, PIT etc.
-		 */
-		if (efi_enabled(EFI_BOOT)) {
-			printk(KERN_INFO "HyperV: Using null_legacy_pic\n");
-			legacy_pic = &null_legacy_pic;
-		}
 	}
 #endif
 

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 89f3b7c..2bdfbff 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c

@@ -303,15 +303,6 @@
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->last_period = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
-	} else {
-		/*
-		 * If we have a PMU initialized but no APIC
-		 * interrupts, we cannot sample hardware
-		 * events (user-space has to fall back and
-		 * sample via a hrtimer based software event):
-		 */
-		if (!x86_pmu.apic)
-			return -EOPNOTSUPP;
 	}
 
 	if (attr->type == PERF_TYPE_RAW)
@@ -1293,7 +1284,7 @@
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
-static int __kprobes
+static int
 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
 	u64 start_clock;
@@ -1311,6 +1302,7 @@
 
 	return ret;
 }
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
 
 struct event_constraint emptyconstraint;
 struct event_constraint unconstrained;
@@ -1366,6 +1358,15 @@
 	x86_pmu.apic = 0;
 	pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
 	pr_info("no hardware sampling interrupt available.\n");
+
+	/*
+	 * If we have a PMU initialized but no APIC
+	 * interrupts, we cannot sample hardware
+	 * events (user-space has to fall back and
+	 * sample via a hrtimer based software event):
+	 */
+	pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
 }
 
 static struct attribute_group x86_pmu_format_group = {

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 4c36bbe..cbb1be3e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c

@@ -593,7 +593,7 @@
 	return 1;
 }
 
-static int __kprobes
+static int
 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
 	int handled = 0;
@@ -606,6 +606,7 @@
 
 	return handled;
 }
+NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
 
 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 {

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d82d155..9dd2459 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c

@@ -384,6 +384,9 @@
 	if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
 		mask |= X86_BR_NO_TX;
 
+	if (br_type & PERF_SAMPLE_BRANCH_COND)
+		mask |= X86_BR_JCC;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -678,6 +681,7 @@
 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 	 */
 	[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
@@ -689,6 +693,7 @@
 	[PERF_SAMPLE_BRANCH_ANY_CALL]	= LBR_REL_CALL | LBR_IND_CALL
 					| LBR_FAR,
 	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
 };
 
 /* core */

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index d9c12d3..b74ebc7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c

@@ -200,7 +200,7 @@
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
-unsigned __kprobes long oops_begin(void)
+unsigned long oops_begin(void)
 {
 	int cpu;
 	unsigned long flags;
@@ -223,8 +223,9 @@
 	return flags;
 }
 EXPORT_SYMBOL_GPL(oops_begin);
+NOKPROBE_SYMBOL(oops_begin);
 
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
 	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
@@ -247,8 +248,9 @@
 		panic("Fatal exception");
 	do_exit(signr);
 }
+NOKPROBE_SYMBOL(oops_end);
 
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+int __die(const char *str, struct pt_regs *regs, long err)
 {
 #ifdef CONFIG_X86_32
 	unsigned short ss;
@@ -291,6 +293,7 @@
 #endif
 	return 0;
 }
+NOKPROBE_SYMBOL(__die);
 
 /*
  * This is gone through when something in the kernel has done something bad

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 98313ff..f0da82b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S

@@ -315,10 +315,6 @@
 ENDPROC(ret_from_kernel_thread)
 
 /*
- * Interrupt exit functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-/*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
  * go as quickly as possible which is why some of this is
@@ -372,10 +368,6 @@
 END(resume_kernel)
 #endif
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -495,10 +487,6 @@
 	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
-/*
- * syscall stub including irq exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -690,10 +678,6 @@
 	jmp resume_userspace
 END(syscall_badsys)
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 .macro FIXUP_ESPFIX_STACK
 /*
@@ -784,10 +768,6 @@
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-/*
- *  Irq entries should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
@@ -964,10 +944,6 @@
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
-/*
- * End of kprobes section
- */
-	.popsection
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1242,11 +1218,6 @@
 	jmp *%ecx
 #endif
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 #ifdef CONFIG_TRACING
 ENTRY(trace_page_fault)
 	RING0_EC_FRAME
@@ -1460,7 +1431,3 @@
 END(async_page_fault)
 #endif
 
-/*
- * End of kprobes section
- */
-	.popsection

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 48a2644..b25ca96 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S

@@ -284,8 +284,6 @@
 	TRACE_IRQS_OFF
 	.endm
 
-/* save complete stack frame */
-	.pushsection .kprobes.text, "ax"
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -314,7 +312,6 @@
 1:	ret
 	CFI_ENDPROC
 END(save_paranoid)
-	.popsection
 
 /*
  * A newly forked process directly context switches into this address.
@@ -772,10 +769,6 @@
 	call \func
 	.endm
 
-/*
- * Interrupt entry/exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	/*
 	 * The interrupt stubs push (~vector+0x80) onto the stack and
 	 * then jump to common_interrupt.
@@ -983,11 +976,6 @@
 #endif
 
 /*
- * End of kprobes section
- */
-       .popsection
-
-/*
  * APIC interrupts.
  */
 .macro apicinterrupt3 num sym do_sym
@@ -1321,11 +1309,6 @@
 	hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
 idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
 idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
@@ -1742,7 +1725,3 @@
 	CFI_ENDPROC
 END(ignore_sysret)
 
-/*
- * End of kprobes section
- */
-	.popsection

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a67b47c..5f9cf20 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c

@@ -32,7 +32,6 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/kallsyms.h>
-#include <linux/kprobes.h>
 #include <linux/percpu.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
@@ -424,7 +423,7 @@
  * NOTIFY_STOP returned for all other cases
  *
  */
-static int __kprobes hw_breakpoint_handler(struct die_args *args)
+static int hw_breakpoint_handler(struct die_args *args)
 {
 	int i, cpu, rc = NOTIFY_STOP;
 	struct perf_event *bp;
@@ -511,7 +510,7 @@
 /*
  * Handle debug exception notifications.
  */
-int __kprobes hw_breakpoint_exceptions_notify(
+int hw_breakpoint_exceptions_notify(
 		struct notifier_block *unused, unsigned long val, void *data)
 {
 	if (val != DIE_DEBUG)

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 2e977b5..8af8171 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c

@@ -299,13 +299,31 @@
 static void init_8259A(int auto_eoi)
 {
 	unsigned long flags;
+	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
+	unsigned char new_val;
 
 	i8259A_auto_eoi = auto_eoi;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	/*
+	 * Check to see if we have a PIC.
+	 * Mask all except the cascade and read
+	 * back the value we just wrote. If we don't
+	 * have a PIC, we will read 0xff as opposed to the
+	 * value we wrote.
+	 */
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+	outb(probe_val, PIC_MASTER_IMR);
+	new_val = inb(PIC_MASTER_IMR);
+	if (new_val != probe_val) {
+		printk(KERN_INFO "Using NULL legacy PIC\n");
+		legacy_pic = &null_legacy_pic;
+		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+		return;
+	}
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 
 	/*
 	 * outb_pic - this has to work on a wide range of PC hardware.

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 11ccfb0..922d285 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c

@@ -365,6 +365,7 @@
 	struct irq_desc *desc;
 	struct irq_data *data;
 	struct irq_chip *chip;
+	int ret;
 
 	for_each_irq_desc(irq, desc) {
 		int break_affinity = 0;
@@ -403,10 +404,14 @@
 		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 			chip->irq_mask(data);
 
-		if (chip->irq_set_affinity)
-			chip->irq_set_affinity(data, affinity, true);
-		else if (!(warned++))
-			set_affinity = 0;
+		if (chip->irq_set_affinity) {
+			ret = chip->irq_set_affinity(data, affinity, true);
+			if (ret == -ENOSPC)
+				pr_crit("IRQ %d set affinity failed because there are no available vectors.  The device assigned to this IRQ is unstable.\n", irq);
+		} else {
+			if (!(warned++))
+				set_affinity = 0;
+		}
 
 		/*
 		 * We unmask if the irq was not marked masked by the

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 61b17dc..7596df6 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c

@@ -112,7 +112,8 @@
 
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
-static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
+static nokprobe_inline void
+__synthesize_relative_insn(void *from, void *to, u8 op)
 {
 	struct __arch_relative_insn {
 		u8 op;
@@ -125,21 +126,23 @@
 }
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-void __kprobes synthesize_reljump(void *from, void *to)
+void synthesize_reljump(void *from, void *to)
 {
 	__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
 }
+NOKPROBE_SYMBOL(synthesize_reljump);
 
 /* Insert a call instruction at address 'from', which calls address 'to'.*/
-void __kprobes synthesize_relcall(void *from, void *to)
+void synthesize_relcall(void *from, void *to)
 {
 	__synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
 }
+NOKPROBE_SYMBOL(synthesize_relcall);
 
 /*
  * Skip the prefixes of the instruction.
  */
-static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
+static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
 {
 	insn_attr_t attr;
 
@@ -154,12 +157,13 @@
 #endif
 	return insn;
 }
+NOKPROBE_SYMBOL(skip_prefixes);
 
 /*
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
@@ -260,7 +264,7 @@
 }
 
 /* Check if paddr is at an instruction boundary */
-static int __kprobes can_probe(unsigned long paddr)
+static int can_probe(unsigned long paddr)
 {
 	unsigned long addr, __addr, offset = 0;
 	struct insn insn;
@@ -299,7 +303,7 @@
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
-static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
+static int is_IF_modifier(kprobe_opcode_t *insn)
 {
 	/* Skip prefixes */
 	insn = skip_prefixes(insn);
@@ -322,7 +326,7 @@
  * If not, return null.
  * Only applicable to 64-bit x86.
  */
-int __kprobes __copy_instruction(u8 *dest, u8 *src)
+int __copy_instruction(u8 *dest, u8 *src)
 {
 	struct insn insn;
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -365,7 +369,7 @@
 	return insn.length;
 }
 
-static int __kprobes arch_copy_kprobe(struct kprobe *p)
+static int arch_copy_kprobe(struct kprobe *p)
 {
 	int ret;
 
@@ -392,7 +396,7 @@
 	return 0;
 }
 
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+int arch_prepare_kprobe(struct kprobe *p)
 {
 	if (alternatives_text_reserved(p->addr, p->addr))
 		return -EINVAL;
@@ -407,17 +411,17 @@
 	return arch_copy_kprobe(p);
 }
 
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
 {
 	text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
 {
 	text_poke(p->addr, &p->opcode, 1);
 }
 
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
 {
 	if (p->ainsn.insn) {
 		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
@@ -425,7 +429,8 @@
 	}
 }
 
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	kcb->prev_kprobe.kp = kprobe_running();
 	kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -433,7 +438,8 @@
 	kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
 }
 
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -441,8 +447,9 @@
 	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
 }
 
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
-				struct kprobe_ctlblk *kcb)
+static nokprobe_inline void
+set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+		   struct kprobe_ctlblk *kcb)
 {
 	__this_cpu_write(current_kprobe, p);
 	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
@@ -451,7 +458,7 @@
 		kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 }
 
-static void __kprobes clear_btf(void)
+static nokprobe_inline void clear_btf(void)
 {
 	if (test_thread_flag(TIF_BLOCKSTEP)) {
 		unsigned long debugctl = get_debugctlmsr();
@@ -461,7 +468,7 @@
 	}
 }
 
-static void __kprobes restore_btf(void)
+static nokprobe_inline void restore_btf(void)
 {
 	if (test_thread_flag(TIF_BLOCKSTEP)) {
 		unsigned long debugctl = get_debugctlmsr();
@@ -471,8 +478,7 @@
 	}
 }
 
-void __kprobes
-arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	unsigned long *sara = stack_addr(regs);
 
@@ -481,9 +487,10 @@
 	/* Replace the return addr with trampoline addr */
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
-static void __kprobes
-setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
+static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb, int reenter)
 {
 	if (setup_detour_execution(p, regs, reenter))
 		return;
@@ -519,22 +526,24 @@
 	else
 		regs->ip = (unsigned long)p->ainsn.insn;
 }
+NOKPROBE_SYMBOL(setup_singlestep);
 
 /*
  * We have reentered the kprobe_handler(), since another probe was hit while
  * within the handler. We save the original kprobes variables and just single
  * step on the instruction of the new probe without calling any user handlers.
  */
-static int __kprobes
-reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+			  struct kprobe_ctlblk *kcb)
 {
 	switch (kcb->kprobe_status) {
 	case KPROBE_HIT_SSDONE:
 	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SS:
 		kprobes_inc_nmissed_count(p);
 		setup_singlestep(p, regs, kcb, 1);
 		break;
-	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
 		/* A probe has been hit in the codepath leading up to, or just
 		 * after, single-stepping of a probed instruction. This entire
 		 * codepath should strictly reside in .kprobes.text section.
@@ -553,12 +562,13 @@
 
 	return 1;
 }
+NOKPROBE_SYMBOL(reenter_kprobe);
 
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
  */
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+int kprobe_int3_handler(struct pt_regs *regs)
 {
 	kprobe_opcode_t *addr;
 	struct kprobe *p;
@@ -621,12 +631,13 @@
 	preempt_enable_no_resched();
 	return 0;
 }
+NOKPROBE_SYMBOL(kprobe_int3_handler);
 
 /*
  * When a retprobed function returns, this code saves registers and
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
-static void __used __kprobes kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
 	asm volatile (
 			".global kretprobe_trampoline\n"
@@ -657,11 +668,13 @@
 #endif
 			"	ret\n");
 }
+NOKPROBE_SYMBOL(kretprobe_trampoline_holder);
+NOKPROBE_SYMBOL(kretprobe_trampoline);
 
 /*
  * Called from kretprobe_trampoline
  */
-__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
+__visible __used void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
@@ -747,6 +760,7 @@
 	}
 	return (void *)orig_ret_address;
 }
+NOKPROBE_SYMBOL(trampoline_handler);
 
 /*
  * Called after single-stepping.  p->addr is the address of the
@@ -775,8 +789,8 @@
  * jump instruction after the copied instruction, that jumps to the next
  * instruction after the probepoint.
  */
-static void __kprobes
-resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void resume_execution(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb)
 {
 	unsigned long *tos = stack_addr(regs);
 	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -851,12 +865,13 @@
 no_change:
 	restore_btf();
 }
+NOKPROBE_SYMBOL(resume_execution);
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate and they
  * remain disabled throughout this function.
  */
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+int kprobe_debug_handler(struct pt_regs *regs)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -891,8 +906,9 @@
 
 	return 1;
 }
+NOKPROBE_SYMBOL(kprobe_debug_handler);
 
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -949,12 +965,13 @@
 
 	return 0;
 }
+NOKPROBE_SYMBOL(kprobe_fault_handler);
 
 /*
  * Wrapper routine for handling exceptions.
  */
-int __kprobes
-kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
+int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
+			     void *data)
 {
 	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
@@ -962,22 +979,7 @@
 	if (args->regs && user_mode_vm(args->regs))
 		return ret;
 
-	switch (val) {
-	case DIE_INT3:
-		if (kprobe_handler(args->regs))
-			ret = NOTIFY_STOP;
-		break;
-	case DIE_DEBUG:
-		if (post_kprobe_handler(args->regs)) {
-			/*
-			 * Reset the BS bit in dr6 (pointed by args->err) to
-			 * denote completion of processing
-			 */
-			(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
-			ret = NOTIFY_STOP;
-		}
-		break;
-	case DIE_GPF:
+	if (val == DIE_GPF) {
 		/*
 		 * To be potentially processing a kprobe fault and to
 		 * trust the result from kprobe_running(), we have
@@ -986,14 +988,12 @@
 		if (!preemptible() && kprobe_running() &&
 		    kprobe_fault_handler(args->regs, args->trapnr))
 			ret = NOTIFY_STOP;
-		break;
-	default:
-		break;
 	}
 	return ret;
 }
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
 
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr;
@@ -1017,8 +1017,9 @@
 	regs->ip = (unsigned long)(jp->entry);
 	return 1;
 }
+NOKPROBE_SYMBOL(setjmp_pre_handler);
 
-void __kprobes jprobe_return(void)
+void jprobe_return(void)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
@@ -1034,8 +1035,10 @@
 			"       nop			\n"::"b"
 			(kcb->jprobe_saved_sp):"memory");
 }
+NOKPROBE_SYMBOL(jprobe_return);
+NOKPROBE_SYMBOL(jprobe_return_end);
 
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	u8 *addr = (u8 *) (regs->ip - 1);
@@ -1063,13 +1066,22 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(longjmp_break_handler);
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+	return  (addr >= (unsigned long)__kprobes_text_start &&
+		 addr < (unsigned long)__kprobes_text_end) ||
+		(addr >= (unsigned long)__entry_text_start &&
+		 addr < (unsigned long)__entry_text_end);
+}
 
 int __init arch_init_kprobes(void)
 {
 	return 0;
 }
 
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
 {
 	return 0;
 }

diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 23ef5c5..717b02a 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c

@@ -25,8 +25,9 @@
 
 #include "common.h"
 
-static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			     struct kprobe_ctlblk *kcb)
+static nokprobe_inline
+int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+		      struct kprobe_ctlblk *kcb)
 {
 	/*
 	 * Emulate singlestep (and also recover regs->ip)
@@ -41,18 +42,19 @@
 	return 1;
 }
 
-int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			      struct kprobe_ctlblk *kcb)
+int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+		    struct kprobe_ctlblk *kcb)
 {
 	if (kprobe_ftrace(p))
 		return __skip_singlestep(p, regs, kcb);
 	else
 		return 0;
 }
+NOKPROBE_SYMBOL(skip_singlestep);
 
 /* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				     struct ftrace_ops *ops, struct pt_regs *regs)
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *ops, struct pt_regs *regs)
 {
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
@@ -84,8 +86,9 @@
 end:
 	local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
 	p->ainsn.insn = NULL;
 	p->ainsn.boostable = -1;

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 898160b..f304773 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c

@@ -77,7 +77,7 @@
 }
 
 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 {
 #ifdef CONFIG_X86_64
 	*addr++ = 0x48;
@@ -138,7 +138,8 @@
 #define INT3_SIZE sizeof(kprobe_opcode_t)
 
 /* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long flags;
@@ -168,8 +169,9 @@
 	}
 	local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(optimized_callback);
 
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+static int copy_optimized_instructions(u8 *dest, u8 *src)
 {
 	int len = 0, ret;
 
@@ -189,7 +191,7 @@
 }
 
 /* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
+static int insn_is_indirect_jump(struct insn *insn)
 {
 	return ((insn->opcode.bytes[0] == 0xff &&
 		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -224,7 +226,7 @@
 }
 
 /* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
+static int can_optimize(unsigned long paddr)
 {
 	unsigned long addr, size = 0, offset = 0;
 	struct insn insn;
@@ -275,7 +277,7 @@
 }
 
 /* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 {
 	int i;
 	struct kprobe *p;
@@ -290,15 +292,15 @@
 }
 
 /* Check the addr is within the optimized instructions. */
-int __kprobes
-arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+				 unsigned long addr)
 {
 	return ((unsigned long)op->kp.addr <= addr &&
 		(unsigned long)op->kp.addr + op->optinsn.size > addr);
 }
 
 /* Free optimized instruction slot */
-static __kprobes
+static
 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 {
 	if (op->optinsn.insn) {
@@ -308,7 +310,7 @@
 	}
 }
 
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 {
 	__arch_remove_optimized_kprobe(op, 1);
 }
@@ -318,7 +320,7 @@
  * Target instructions MUST be relocatable (checked inside)
  * This is called when new aggr(opt)probe is allocated or reused.
  */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 {
 	u8 *buf;
 	int ret;
@@ -372,7 +374,7 @@
  * Replace breakpoints (int3) with relative jumps.
  * Caller must call with locking kprobe_mutex and text_mutex.
  */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+void arch_optimize_kprobes(struct list_head *oplist)
 {
 	struct optimized_kprobe *op, *tmp;
 	u8 insn_buf[RELATIVEJUMP_SIZE];
@@ -398,7 +400,7 @@
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
 	u8 insn_buf[RELATIVEJUMP_SIZE];
 
@@ -424,8 +426,7 @@
 	}
 }
 
-int  __kprobes
-setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 {
 	struct optimized_kprobe *op;
 
@@ -441,3 +442,4 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(setup_detour_execution);

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7e97371..3dd8e2c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -251,8 +251,9 @@
 	return reason;
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
+NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	enum ctx_state prev_state;
@@ -276,6 +277,7 @@
 		break;
 	}
 }
+NOKPROBE_SYMBOL(do_async_page_fault);
 
 static void __init paravirt_ops_setup(void)
 {

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index b4872b9..c3e985d 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c

@@ -110,7 +110,7 @@
 		a->handler, whole_msecs, decimal_msecs);
 }
 
-static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
 {
 	struct nmi_desc *desc = nmi_to_desc(type);
 	struct nmiaction *a;
@@ -146,6 +146,7 @@
 	/* return total number of NMI events handled */
 	return handled;
 }
+NOKPROBE_SYMBOL(nmi_handle);
 
 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 {
@@ -208,7 +209,7 @@
 }
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
 
-static __kprobes void
+static void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
 	/* check to see if anyone registered against these types of errors */
@@ -238,8 +239,9 @@
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
 	outb(reason, NMI_REASON_PORT);
 }
+NOKPROBE_SYMBOL(pci_serr_error);
 
-static __kprobes void
+static void
 io_check_error(unsigned char reason, struct pt_regs *regs)
 {
 	unsigned long i;
@@ -269,8 +271,9 @@
 	reason &= ~NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);
 }
+NOKPROBE_SYMBOL(io_check_error);
 
-static __kprobes void
+static void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
 	int handled;
@@ -298,11 +301,12 @@
 
 	pr_emerg("Dazed and confused, but trying to continue\n");
 }
+NOKPROBE_SYMBOL(unknown_nmi_error);
 
 static DEFINE_PER_CPU(bool, swallow_nmi);
 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
 
-static __kprobes void default_do_nmi(struct pt_regs *regs)
+static void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 	int handled;
@@ -401,6 +405,7 @@
 	else
 		unknown_nmi_error(reason, regs);
 }
+NOKPROBE_SYMBOL(default_do_nmi);
 
 /*
  * NMIs can hit breakpoints which will cause it to lose its
@@ -520,7 +525,7 @@
 }
 #endif
 
-dotraplinkage notrace __kprobes void
+dotraplinkage notrace void
 do_nmi(struct pt_regs *regs, long error_code)
 {
 	nmi_nesting_preprocess(regs);
@@ -537,6 +542,7 @@
 	/* On i386, may loop back to preprocess */
 	nmi_nesting_postprocess();
 }
+NOKPROBE_SYMBOL(do_nmi);
 
 void stop_nmi(void)
 {

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b10af8..548d25f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c

@@ -23,6 +23,7 @@
 #include <linux/efi.h>
 #include <linux/bcd.h>
 #include <linux/highmem.h>
+#include <linux/kprobes.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
@@ -389,6 +390,11 @@
 	.end_context_switch = paravirt_nop,
 };
 
+/* At this point, native_get/set_debugreg has real function entries */
+NOKPROBE_SYMBOL(native_get_debugreg);
+NOKPROBE_SYMBOL(native_set_debugreg);
+NOKPROBE_SYMBOL(native_load_idt);
+
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 898d077..ca5b02d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c

@@ -413,12 +413,11 @@
 	set_thread_flag(TIF_ADDR32);
 
 	/* Mark the associated mm as containing 32-bit tasks. */
-	if (current->mm)
-		current->mm->context.ia32_compat = 1;
-
 	if (x32) {
 		clear_thread_flag(TIF_IA32);
 		set_thread_flag(TIF_X32);
+		if (current->mm)
+			current->mm->context.ia32_compat = TIF_X32;
 		current->personality &= ~READ_IMPLIES_EXEC;
 		/* is_compat_task() uses the presence of the x32
 		   syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@
 	} else {
 		set_thread_flag(TIF_IA32);
 		clear_thread_flag(TIF_X32);
+		if (current->mm)
+			current->mm->context.ia32_compat = TIF_IA32;
 		current->personality |= force_personality32;
 		/* Prepare the first "return" to user space */
 		current_thread_info()->status |= TS_COMPAT;

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f73b5d4..c6eb418 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/uprobes.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -106,7 +107,7 @@
 	preempt_count_dec();
 }
 
-static int __kprobes
+static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		  struct pt_regs *regs,	long error_code)
 {
@@ -136,7 +137,38 @@
 	return -1;
 }
 
-static void __kprobes
+static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
+				siginfo_t *info)
+{
+	unsigned long siaddr;
+	int sicode;
+
+	switch (trapnr) {
+	default:
+		return SEND_SIG_PRIV;
+
+	case X86_TRAP_DE:
+		sicode = FPE_INTDIV;
+		siaddr = uprobe_get_trap_addr(regs);
+		break;
+	case X86_TRAP_UD:
+		sicode = ILL_ILLOPN;
+		siaddr = uprobe_get_trap_addr(regs);
+		break;
+	case X86_TRAP_AC:
+		sicode = BUS_ADRALN;
+		siaddr = 0;
+		break;
+	}
+
+	info->si_signo = signr;
+	info->si_errno = 0;
+	info->si_code = sicode;
+	info->si_addr = (void __user *)siaddr;
+	return info;
+}
+
+static void
 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 	long error_code, siginfo_t *info)
 {
@@ -168,60 +200,43 @@
 	}
 #endif
 
-	if (info)
-		force_sig_info(signr, info, tsk);
-	else
-		force_sig(signr, tsk);
+	force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
+			  unsigned long trapnr, int signr)
+{
+	enum ctx_state prev_state = exception_enter();
+	siginfo_t info;
+
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
+			NOTIFY_STOP) {
+		conditional_sti(regs);
+		do_trap(trapnr, signr, str, regs, error_code,
+			fill_trap_info(regs, signr, trapnr, &info));
+	}
+
+	exception_exit(prev_state);
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	enum ctx_state prev_state;					\
-									\
-	prev_state = exception_enter();					\
-	if (notify_die(DIE_TRAP, str, regs, error_code,			\
-			trapnr, signr) == NOTIFY_STOP) {		\
-		exception_exit(prev_state);				\
-		return;							\
-	}								\
-	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
-	exception_exit(prev_state);					\
+	do_error_trap(regs, error_code, str, trapnr, signr);		\
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
-dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
-{									\
-	siginfo_t info;							\
-	enum ctx_state prev_state;					\
-									\
-	info.si_signo = signr;						\
-	info.si_errno = 0;						\
-	info.si_code = sicode;						\
-	info.si_addr = (void __user *)siaddr;				\
-	prev_state = exception_enter();					\
-	if (notify_die(DIE_TRAP, str, regs, error_code,			\
-			trapnr, signr) == NOTIFY_STOP) {		\
-		exception_exit(prev_state);				\
-		return;							\
-	}								\
-	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, &info);		\
-	exception_exit(prev_state);					\
-}
-
-DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error,		     FPE_INTDIV, regs->ip )
-DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow					  )
-DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds						  )
-DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op,		     ILL_ILLOPN, regs->ip )
-DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun			  )
-DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS					  )
-DO_ERROR     (X86_TRAP_NP,     SIGBUS,  "segment not present",		segment_not_present				  )
+DO_ERROR(X86_TRAP_DE,     SIGFPE,  "divide error",		divide_error)
+DO_ERROR(X86_TRAP_OF,     SIGSEGV, "overflow",			overflow)
+DO_ERROR(X86_TRAP_BR,     SIGSEGV, "bounds",			bounds)
+DO_ERROR(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op)
+DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",coprocessor_segment_overrun)
+DO_ERROR(X86_TRAP_TS,     SIGSEGV, "invalid TSS",		invalid_TSS)
+DO_ERROR(X86_TRAP_NP,     SIGBUS,  "segment not present",	segment_not_present)
 #ifdef CONFIG_X86_32
-DO_ERROR     (X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment					  )
+DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
 #endif
-DO_ERROR_INFO(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check,	     BUS_ADRALN, 0	  )
+DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
@@ -263,7 +278,7 @@
 }
 #endif
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_general_protection(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk;
@@ -305,13 +320,14 @@
 		pr_cont("\n");
 	}
 
-	force_sig(SIGSEGV, tsk);
+	force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 exit:
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_general_protection);
 
 /* May run on IST stack. */
-dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
+dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
 	enum ctx_state prev_state;
 
@@ -327,13 +343,18 @@
 	if (poke_int3_handler(regs))
 		return;
 
-	prev_state = exception_enter();
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 				SIGTRAP) == NOTIFY_STOP)
 		goto exit;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 
+#ifdef CONFIG_KPROBES
+	if (kprobe_int3_handler(regs))
+		return;
+#endif
+	prev_state = exception_enter();
+
 	if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 			SIGTRAP) == NOTIFY_STOP)
 		goto exit;
@@ -350,6 +371,7 @@
 exit:
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
@@ -357,7 +379,7 @@
  * for scheduling or signal handling. The actual stack switch is done in
  * entry.S
  */
-asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
 	/* Did already sync */
@@ -376,6 +398,7 @@
 		*regs = *eregs;
 	return regs;
 }
+NOKPROBE_SYMBOL(sync_regs);
 #endif
 
 /*
@@ -402,7 +425,7 @@
  *
  * May run on IST stack.
  */
-dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
+dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk = current;
 	enum ctx_state prev_state;
@@ -410,8 +433,6 @@
 	unsigned long dr6;
 	int si_code;
 
-	prev_state = exception_enter();
-
 	get_debugreg(dr6, 6);
 
 	/* Filter out all the reserved bits which are preset to 1 */
@@ -440,6 +461,12 @@
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
+#ifdef CONFIG_KPROBES
+	if (kprobe_debug_handler(regs))
+		goto exit;
+#endif
+	prev_state = exception_enter();
+
 	if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
 							SIGTRAP) == NOTIFY_STOP)
 		goto exit;
@@ -482,13 +509,14 @@
 exit:
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_debug);
 
 /*
  * Note that we play around with the 'TS' bit in an attempt to get
  * the correct behaviour even in the presence of the asynchronous
  * IRQ13 behaviour
  */
-void math_error(struct pt_regs *regs, int error_code, int trapnr)
+static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 {
 	struct task_struct *task = current;
 	siginfo_t info;
@@ -518,7 +546,7 @@
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
-	info.si_addr = (void __user *)regs->ip;
+	info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
 	if (trapnr == X86_TRAP_MF) {
 		unsigned short cwd, swd;
 		/*
@@ -645,7 +673,7 @@
 	 */
 	if (unlikely(restore_fpu_checking(tsk))) {
 		drop_init_fpu(tsk);
-		force_sig(SIGSEGV, tsk);
+		force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 		return;
 	}
 
@@ -653,7 +681,7 @@
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
-dotraplinkage void __kprobes
+dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
 	enum ctx_state prev_state;
@@ -679,6 +707,7 @@
 #endif
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_device_not_available);
 
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ace2291..5d1cbfe 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c

@@ -32,20 +32,20 @@
 
 /* Post-execution fixups. */
 
-/* No fixup needed */
-#define UPROBE_FIX_NONE		0x0
-
 /* Adjust IP back to vicinity of actual insn */
-#define UPROBE_FIX_IP		0x1
+#define UPROBE_FIX_IP		0x01
 
 /* Adjust the return address of a call insn */
-#define UPROBE_FIX_CALL	0x2
+#define UPROBE_FIX_CALL		0x02
 
 /* Instruction will modify TF, don't change it */
-#define UPROBE_FIX_SETF	0x4
+#define UPROBE_FIX_SETF		0x04
 
-#define UPROBE_FIX_RIP_AX	0x8000
-#define UPROBE_FIX_RIP_CX	0x4000
+#define UPROBE_FIX_RIP_SI	0x08
+#define UPROBE_FIX_RIP_DI	0x10
+#define UPROBE_FIX_RIP_BX	0x20
+#define UPROBE_FIX_RIP_MASK	\
+	(UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
 
 #define	UPROBE_TRAP_NR		UINT_MAX
 
@@ -67,6 +67,7 @@
  * to keep gcc from statically optimizing it out, as variable_test_bit makes
  * some versions of gcc to think only *(unsigned long*) is used.
  */
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static volatile u32 good_insns_32[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 	/*      ----------------------------------------------         */
@@ -89,6 +90,37 @@
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
+#else
+#define good_insns_32	NULL
+#endif
+
+/* Good-instruction tables for 64-bit apps */
+#if defined(CONFIG_X86_64)
+static volatile u32 good_insns_64[256 / 32] = {
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+	/*      ----------------------------------------------         */
+	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
+	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
+	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
+	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
+	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
+	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
+	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
+	/*      ----------------------------------------------         */
+	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
+};
+#else
+#define good_insns_64	NULL
+#endif
 
 /* Using this for both 64-bit and 32-bit apps */
 static volatile u32 good_2byte_insns[256 / 32] = {
@@ -113,32 +145,6 @@
 	/*      ----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
 };
-
-#ifdef CONFIG_X86_64
-/* Good-instruction tables for 64-bit apps */
-static volatile u32 good_insns_64[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-	/*      ----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
-	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
-	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
-	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
-	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
-	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
-	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
-	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
-	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
-	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
-	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
-	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
-	/*      ----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
-};
-#endif
 #undef W
 
 /*
@@ -209,16 +215,25 @@
 	return false;
 }
 
-static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
+static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
 {
-	insn_init(insn, auprobe->insn, false);
+	u32 volatile *good_insns;
 
-	/* Skip good instruction prefixes; reject "bad" ones. */
-	insn_get_opcode(insn);
+	insn_init(insn, auprobe->insn, x86_64);
+	/* has the side-effect of processing the entire instruction */
+	insn_get_length(insn);
+	if (WARN_ON_ONCE(!insn_complete(insn)))
+		return -ENOEXEC;
+
 	if (is_prefix_bad(insn))
 		return -ENOTSUPP;
 
-	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
+	if (x86_64)
+		good_insns = good_insns_64;
+	else
+		good_insns = good_insns_32;
+
+	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
 		return 0;
 
 	if (insn->opcode.nbytes == 2) {
@@ -230,14 +245,18 @@
 }
 
 #ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return	!config_enabled(CONFIG_IA32_EMULATION) ||
+		!(mm->context.ia32_compat == TIF_IA32);
+}
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
  * its memory operand indirectly through a scratch register.  Set
- * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
- * accordingly.  (The contents of the scratch register will be saved
- * before we single-step the modified instruction, and restored
- * afterward.)
+ * defparam->fixups accordingly. (The contents of the scratch register
+ * will be saved before we single-step the modified instruction,
+ * and restored afterward).
  *
  * We do this because a rip-relative instruction can access only a
  * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -248,164 +267,192 @@
  *
  * Some useful facts about rip-relative instructions:
  *
- *  - There's always a modrm byte.
+ *  - There's always a modrm byte with bit layout "00 reg 101".
  *  - There's never a SIB byte.
  *  - The displacement is always 4 bytes.
+ *  - REX.B=1 bit in REX prefix, which normally extends r/m field,
+ *    has no effect on rip-relative mode. It doesn't make modrm byte
+ *    with r/m=101 refer to register 1101 = R13.
  */
-static void
-handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 *cursor;
 	u8 reg;
+	u8 reg2;
 
 	if (!insn_rip_relative(insn))
 		return;
 
 	/*
-	 * insn_rip_relative() would have decoded rex_prefix, modrm.
+	 * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
 	 * Clear REX.b bit (extension of MODRM.rm field):
-	 * we want to encode rax/rcx, not r8/r9.
+	 * we want to encode low numbered reg, not r8+.
 	 */
 	if (insn->rex_prefix.nbytes) {
 		cursor = auprobe->insn + insn_offset_rex_prefix(insn);
-		*cursor &= 0xfe;	/* Clearing REX.B bit */
+		/* REX byte has 0100wrxb layout, clearing REX.b bit */
+		*cursor &= 0xfe;
+	}
+	/*
+	 * Similar treatment for VEX3 prefix.
+	 * TODO: add XOP/EVEX treatment when insn decoder supports them
+	 */
+	if (insn->vex_prefix.nbytes == 3) {
+		/*
+		 * vex2:     c5    rvvvvLpp   (has no b bit)
+		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
+		 *   (evex will need setting of both b and x since
+		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
+		 * Setting VEX3.b (setting because it has inverted meaning):
+		 */
+		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+		*cursor |= 0x20;
 	}
 
 	/*
+	 * Convert from rip-relative addressing to register-relative addressing
+	 * via a scratch register.
+	 *
+	 * This is tricky since there are insns with modrm byte
+	 * which also use registers not encoded in modrm byte:
+	 * [i]div/[i]mul: implicitly use dx:ax
+	 * shift ops: implicitly use cx
+	 * cmpxchg: implicitly uses ax
+	 * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
+	 *   Encoding: 0f c7/1 modrm
+	 *   The code below thinks that reg=1 (cx), chooses si as scratch.
+	 * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
+	 *   First appeared in Haswell (BMI2 insn). It is vex-encoded.
+	 *   Example where none of bx,cx,dx can be used as scratch reg:
+	 *   c4 e2 63 f6 0d disp32   mulx disp32(%rip),%ebx,%ecx
+	 * [v]pcmpistri: implicitly uses cx, xmm0
+	 * [v]pcmpistrm: implicitly uses xmm0
+	 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
+	 * [v]pcmpestrm: implicitly uses ax, dx, xmm0
+	 *   Evil SSE4.2 string comparison ops from hell.
+	 * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
+	 *   Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
+	 *   Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
+	 *   AMD says it has no 3-operand form (vex.vvvv must be 1111)
+	 *   and that it can have only register operands, not mem
+	 *   (its modrm byte must have mode=11).
+	 *   If these restrictions will ever be lifted,
+	 *   we'll need code to prevent selection of di as scratch reg!
+	 *
+	 * Summary: I don't know any insns with modrm byte which
+	 * use SI register implicitly. DI register is used only
+	 * by one insn (maskmovq) and BX register is used
+	 * only by one too (cmpxchg8b).
+	 * BP is stack-segment based (may be a problem?).
+	 * AX, DX, CX are off-limits (many implicit users).
+	 * SP is unusable (it's stack pointer - think about "pop mem";
+	 * also, rsp+disp32 needs sib encoding -> insn length change).
+	 */
+
+	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
+	reg2 = 0xff;		/* Fetch vex.vvvv */
+	if (insn->vex_prefix.nbytes == 2)
+		reg2 = insn->vex_prefix.bytes[1];
+	else if (insn->vex_prefix.nbytes == 3)
+		reg2 = insn->vex_prefix.bytes[2];
+	/*
+	 * TODO: add XOP, EXEV vvvv reading.
+	 *
+	 * vex.vvvv field is in bits 6-3, bits are inverted.
+	 * But in 32-bit mode, high-order bit may be ignored.
+	 * Therefore, let's consider only 3 low-order bits.
+	 */
+	reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
+	/*
+	 * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
+	 *
+	 * Choose scratch reg. Order is important: must not select bx
+	 * if we can use si (cmpxchg8b case!)
+	 */
+	if (reg != 6 && reg2 != 6) {
+		reg2 = 6;
+		auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI;
+	} else if (reg != 7 && reg2 != 7) {
+		reg2 = 7;
+		auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI;
+		/* TODO (paranoia): force maskmovq to not use di */
+	} else {
+		reg2 = 3;
+		auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX;
+	}
+	/*
 	 * Point cursor at the modrm byte.  The next 4 bytes are the
 	 * displacement.  Beyond the displacement, for some instructions,
 	 * is the immediate operand.
 	 */
 	cursor = auprobe->insn + insn_offset_modrm(insn);
-	insn_get_length(insn);
-
 	/*
-	 * Convert from rip-relative addressing to indirect addressing
-	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
-	 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
+	 * Change modrm from "00 reg 101" to "10 reg reg2". Example:
+	 * 89 05 disp32  mov %eax,disp32(%rip) becomes
+	 * 89 86 disp32  mov %eax,disp32(%rsi)
 	 */
-	reg = MODRM_REG(insn);
-	if (reg == 0) {
-		/*
-		 * The register operand (if any) is either the A register
-		 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
-		 * REX prefix) %r8.  In any case, we know the C register
-		 * is NOT the register operand, so we use %rcx (register
-		 * #1) for the scratch register.
-		 */
-		auprobe->fixups = UPROBE_FIX_RIP_CX;
-		/* Change modrm from 00 000 101 to 00 000 001. */
-		*cursor = 0x1;
-	} else {
-		/* Use %rax (register #0) for the scratch register. */
-		auprobe->fixups = UPROBE_FIX_RIP_AX;
-		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
-		*cursor = (reg << 3);
-	}
+	*cursor = 0x80 | (reg << 3) | reg2;
+}
 
-	/* Target address = address of next instruction + (signed) offset */
-	auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
-
-	/* Displacement field is gone; slide immediate field (if any) over. */
-	if (insn->immediate.nbytes) {
-		cursor++;
-		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
-	}
+static inline unsigned long *
+scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI)
+		return &regs->si;
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI)
+		return &regs->di;
+	return &regs->bx;
 }
 
 /*
  * If we're emulating a rip-relative instruction, save the contents
  * of the scratch register and store the target address in that register.
  */
-static void
-pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
-		autask->saved_scratch_register = regs->ax;
-		regs->ax = current->utask->vaddr;
-		regs->ax += auprobe->rip_rela_target_address;
-	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
-		autask->saved_scratch_register = regs->cx;
-		regs->cx = current->utask->vaddr;
-		regs->cx += auprobe->rip_rela_target_address;
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
+		struct uprobe_task *utask = current->utask;
+		unsigned long *sr = scratch_reg(auprobe, regs);
+
+		utask->autask.saved_scratch_register = *sr;
+		*sr = utask->vaddr + auprobe->defparam.ilen;
 	}
 }
 
-static void
-handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
-		struct arch_uprobe_task *autask;
+	if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
+		struct uprobe_task *utask = current->utask;
+		unsigned long *sr = scratch_reg(auprobe, regs);
 
-		autask = &current->utask->autask;
-		if (auprobe->fixups & UPROBE_FIX_RIP_AX)
-			regs->ax = autask->saved_scratch_register;
-		else
-			regs->cx = autask->saved_scratch_register;
-
-		/*
-		 * The original instruction includes a displacement, and so
-		 * is 4 bytes longer than what we've just single-stepped.
-		 * Caller may need to apply other fixups to handle stuff
-		 * like "jmpq *...(%rip)" and "callq *...(%rip)".
-		 */
-		if (correction)
-			*correction += 4;
+		*sr = utask->autask.saved_scratch_register;
 	}
 }
-
-static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
-{
-	insn_init(insn, auprobe->insn, true);
-
-	/* Skip good instruction prefixes; reject "bad" ones. */
-	insn_get_opcode(insn);
-	if (is_prefix_bad(insn))
-		return -ENOTSUPP;
-
-	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
-		return 0;
-
-	if (insn->opcode.nbytes == 2) {
-		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
-			return 0;
-	}
-	return -ENOTSUPP;
-}
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
-{
-	if (mm->context.ia32_compat)
-		return validate_insn_32bits(auprobe, insn);
-	return validate_insn_64bits(auprobe, insn);
-}
 #else /* 32-bit: */
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+	return false;
+}
 /*
  * No RIP-relative addressing on 32-bit
  */
-static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 {
 }
-static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
-				struct arch_uprobe_task *autask)
+static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
-					long *correction)
+static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 }
-
-static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn)
-{
-	return validate_insn_32bits(auprobe, insn);
-}
 #endif /* CONFIG_X86_64 */
 
 struct uprobe_xol_ops {
 	bool	(*emulate)(struct arch_uprobe *, struct pt_regs *);
 	int	(*pre_xol)(struct arch_uprobe *, struct pt_regs *);
 	int	(*post_xol)(struct arch_uprobe *, struct pt_regs *);
+	void	(*abort)(struct arch_uprobe *, struct pt_regs *);
 };
 
 static inline int sizeof_long(void)
@@ -415,50 +462,67 @@
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
+	riprel_pre_xol(auprobe, regs);
+	return 0;
+}
+
+static int push_ret_address(struct pt_regs *regs, unsigned long ip)
+{
+	unsigned long new_sp = regs->sp - sizeof_long();
+
+	if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
+		return -EFAULT;
+
+	regs->sp = new_sp;
 	return 0;
 }
 
 /*
- * Adjust the return address pushed by a call insn executed out of line.
+ * We have to fix things up as follows:
+ *
+ * Typically, the new ip is relative to the copied instruction.  We need
+ * to make it relative to the original instruction (FIX_IP).  Exceptions
+ * are return instructions and absolute or indirect jump or call instructions.
+ *
+ * If the single-stepped instruction was a call, the return address that
+ * is atop the stack is the address following the copied instruction.  We
+ * need to make it the address following the original instruction (FIX_CALL).
+ *
+ * If the original instruction was a rip-relative instruction such as
+ * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
+ * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
+ * We need to restore the contents of the scratch register
+ * (FIX_RIP_reg).
  */
-static int adjust_ret_addr(unsigned long sp, long correction)
+static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	int rasize = sizeof_long();
-	long ra;
+	struct uprobe_task *utask = current->utask;
 
-	if (copy_from_user(&ra, (void __user *)sp, rasize))
-		return -EFAULT;
-
-	ra += correction;
-	if (copy_to_user((void __user *)sp, &ra, rasize))
-		return -EFAULT;
+	riprel_post_xol(auprobe, regs);
+	if (auprobe->defparam.fixups & UPROBE_FIX_IP) {
+		long correction = utask->vaddr - utask->xol_vaddr;
+		regs->ip += correction;
+	} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
+		regs->sp += sizeof_long(); /* Pop incorrect return address */
+		if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen))
+			return -ERESTART;
+	}
+	/* popf; tell the caller to not touch TF */
+	if (auprobe->defparam.fixups & UPROBE_FIX_SETF)
+		utask->autask.saved_tf = true;
 
 	return 0;
 }
 
-static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	struct uprobe_task *utask = current->utask;
-	long correction = (long)(utask->vaddr - utask->xol_vaddr);
-
-	handle_riprel_post_xol(auprobe, regs, &correction);
-	if (auprobe->fixups & UPROBE_FIX_IP)
-		regs->ip += correction;
-
-	if (auprobe->fixups & UPROBE_FIX_CALL) {
-		if (adjust_ret_addr(regs->sp, correction)) {
-			regs->sp += sizeof_long();
-			return -ERESTART;
-		}
-	}
-
-	return 0;
+	riprel_post_xol(auprobe, regs);
 }
 
 static struct uprobe_xol_ops default_xol_ops = {
 	.pre_xol  = default_pre_xol_op,
 	.post_xol = default_post_xol_op,
+	.abort	  = default_abort_op,
 };
 
 static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +584,6 @@
 	unsigned long offs = (long)auprobe->branch.offs;
 
 	if (branch_is_call(auprobe)) {
-		unsigned long new_sp = regs->sp - sizeof_long();
 		/*
 		 * If it fails we execute this (mangled, see the comment in
 		 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +593,8 @@
 		 *
 		 * But there is corner case, see the comment in ->post_xol().
 		 */
-		if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
+		if (push_ret_address(regs, new_ip))
 			return false;
-		regs->sp = new_sp;
 	} else if (!check_jmp_cond(auprobe, regs)) {
 		offs = 0;
 	}
@@ -583,11 +645,7 @@
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
-
-	/* has the side-effect of processing the entire instruction */
-	insn_get_length(insn);
-	if (WARN_ON_ONCE(!insn_complete(insn)))
-		return -ENOEXEC;
+	int i;
 
 	switch (opc1) {
 	case 0xeb:	/* jmp 8 */
@@ -612,6 +670,16 @@
 			return -ENOSYS;
 	}
 
+	/*
+	 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
+	 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
+	 * No one uses these insns, reject any branch insns with such prefix.
+	 */
+	for (i = 0; i < insn->prefixes.nbytes; i++) {
+		if (insn->prefixes.bytes[i] == 0x66)
+			return -ENOTSUPP;
+	}
+
 	auprobe->branch.opc1 = opc1;
 	auprobe->branch.ilen = insn->length;
 	auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +698,10 @@
 int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
 {
 	struct insn insn;
-	bool fix_ip = true, fix_call = false;
+	u8 fix_ip_or_call = UPROBE_FIX_IP;
 	int ret;
 
-	ret = validate_insn_bits(auprobe, mm, &insn);
+	ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
 	if (ret)
 		return ret;
 
@@ -642,44 +710,39 @@
 		return ret;
 
 	/*
-	 * Figure out which fixups arch_uprobe_post_xol() will need to perform,
-	 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
-	 * is either zero or it reflects rip-related fixups.
+	 * Figure out which fixups default_post_xol_op() will need to perform,
+	 * and annotate defparam->fixups accordingly.
 	 */
 	switch (OPCODE1(&insn)) {
 	case 0x9d:		/* popf */
-		auprobe->fixups |= UPROBE_FIX_SETF;
+		auprobe->defparam.fixups |= UPROBE_FIX_SETF;
 		break;
 	case 0xc3:		/* ret or lret -- ip is correct */
 	case 0xcb:
 	case 0xc2:
 	case 0xca:
-		fix_ip = false;
+	case 0xea:		/* jmp absolute -- ip is correct */
+		fix_ip_or_call = 0;
 		break;
 	case 0x9a:		/* call absolute - Fix return addr, not ip */
-		fix_call = true;
-		fix_ip = false;
-		break;
-	case 0xea:		/* jmp absolute -- ip is correct */
-		fix_ip = false;
+		fix_ip_or_call = UPROBE_FIX_CALL;
 		break;
 	case 0xff:
-		insn_get_modrm(&insn);
 		switch (MODRM_REG(&insn)) {
 		case 2: case 3:			/* call or lcall, indirect */
-			fix_call = true;
+			fix_ip_or_call = UPROBE_FIX_CALL;
+			break;
 		case 4: case 5:			/* jmp or ljmp, indirect */
-			fix_ip = false;
+			fix_ip_or_call = 0;
+			break;
 		}
 		/* fall through */
 	default:
-		handle_riprel_insn(auprobe, &insn);
+		riprel_analyze(auprobe, &insn);
 	}
 
-	if (fix_ip)
-		auprobe->fixups |= UPROBE_FIX_IP;
-	if (fix_call)
-		auprobe->fixups |= UPROBE_FIX_CALL;
+	auprobe->defparam.ilen = insn.length;
+	auprobe->defparam.fixups |= fix_ip_or_call;
 
 	auprobe->ops = &default_xol_ops;
 	return 0;
@@ -694,6 +757,12 @@
 {
 	struct uprobe_task *utask = current->utask;
 
+	if (auprobe->ops->pre_xol) {
+		int err = auprobe->ops->pre_xol(auprobe, regs);
+		if (err)
+			return err;
+	}
+
 	regs->ip = utask->xol_vaddr;
 	utask->autask.saved_trap_nr = current->thread.trap_nr;
 	current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +772,6 @@
 	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
 		set_task_blockstep(current, false);
 
-	if (auprobe->ops->pre_xol)
-		return auprobe->ops->pre_xol(auprobe, regs);
 	return 0;
 }
 
@@ -732,56 +799,42 @@
  * single-step, we single-stepped a copy of the instruction.
  *
  * This function prepares to resume execution after the single-step.
- * We have to fix things up as follows:
- *
- * Typically, the new ip is relative to the copied instruction.  We need
- * to make it relative to the original instruction (FIX_IP).  Exceptions
- * are return instructions and absolute or indirect jump or call instructions.
- *
- * If the single-stepped instruction was a call, the return address that
- * is atop the stack is the address following the copied instruction.  We
- * need to make it the address following the original instruction (FIX_CALL).
- *
- * If the original instruction was a rip-relative instruction such as
- * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
- * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
- * We need to restore the contents of the scratch register and adjust
- * the ip, keeping in mind that the instruction we executed is 4 bytes
- * shorter than the original instruction (since we squeezed out the offset
- * field).  (FIX_RIP_AX or FIX_RIP_CX)
  */
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
+	bool send_sigtrap = utask->autask.saved_tf;
+	int err = 0;
 
 	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
 
 	if (auprobe->ops->post_xol) {
-		int err = auprobe->ops->post_xol(auprobe, regs);
+		err = auprobe->ops->post_xol(auprobe, regs);
 		if (err) {
-			arch_uprobe_abort_xol(auprobe, regs);
 			/*
-			 * Restart the probed insn. ->post_xol() must ensure
-			 * this is really possible if it returns -ERESTART.
+			 * Restore ->ip for restart or post mortem analysis.
+			 * ->post_xol() must not return -ERESTART unless this
+			 * is really possible.
 			 */
+			regs->ip = utask->vaddr;
 			if (err == -ERESTART)
-				return 0;
-			return err;
+				err = 0;
+			send_sigtrap = false;
 		}
 	}
-
-	current->thread.trap_nr = utask->autask.saved_trap_nr;
 	/*
 	 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
 	 * so we can get an extra SIGTRAP if we do not clear TF. We need
 	 * to examine the opcode to make it right.
 	 */
-	if (utask->autask.saved_tf)
+	if (send_sigtrap)
 		send_sig(SIGTRAP, current, 0);
-	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+
+	if (!utask->autask.saved_tf)
 		regs->flags &= ~X86_EFLAGS_TF;
 
-	return 0;
+	return err;
 }
 
 /* callback routine for handling exceptions. */
@@ -815,18 +868,18 @@
 
 /*
  * This function gets called when XOL instruction either gets trapped or
- * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
- * Reset the instruction pointer to its probed address for the potential
- * restart or for post mortem analysis.
+ * the thread has a fatal signal. Reset the instruction pointer to its
+ * probed address for the potential restart or for post mortem analysis.
  */
 void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
 
-	current->thread.trap_nr = utask->autask.saved_trap_nr;
-	handle_riprel_post_xol(auprobe, regs, NULL);
-	instruction_pointer_set(regs, utask->vaddr);
+	if (auprobe->ops->abort)
+		auprobe->ops->abort(auprobe, regs);
 
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
+	regs->ip = utask->vaddr;
 	/* clear TF if it was set by us in arch_uprobe_pre_xol() */
 	if (!utask->autask.saved_tf)
 		regs->flags &= ~X86_EFLAGS_TF;

diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 2930ae0..28f85c91 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S

@@ -4,8 +4,8 @@
  *  (inspired by Andi Kleen's thunk_64.S)
  * Subject to the GNU public license, v.2. No warranty of any kind.
  */
-
 	#include <linux/linkage.h>
+	#include <asm/asm.h>
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 	/* put return address in eax (arg1) */
@@ -22,6 +22,7 @@
 	popl %ecx
 	popl %eax
 	ret
+	_ASM_NOKPROBE(\name)
 	.endm
 
 	thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index a63efd6..92d9fea 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S

@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/calling.h>
+#include <asm/asm.h>
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func, put_ret_addr_in_rdi=0
@@ -25,6 +26,7 @@
 	call \func
 	jmp  restore
 	CFI_ENDPROC
+	_ASM_NOKPROBE(\name)
 	.endm
 
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -43,3 +45,4 @@
 	RESTORE_ARGS
 	ret
 	CFI_ENDPROC
+	_ASM_NOKPROBE(restore)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 858b47b..3664279 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c

@@ -8,7 +8,7 @@
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/module.h>		/* search_exception_table	*/
 #include <linux/bootmem.h>		/* max_low_pfn			*/
-#include <linux/kprobes.h>		/* __kprobes, ...		*/
+#include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
@@ -46,7 +46,7 @@
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
-static inline int __kprobes
+static nokprobe_inline int
 kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
 	if (unlikely(is_kmmio_active()))
@@ -55,7 +55,7 @@
 	return 0;
 }
 
-static inline int __kprobes kprobes_fault(struct pt_regs *regs)
+static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
 {
 	int ret = 0;
 
@@ -262,7 +262,7 @@
  *
  *   Handle a fault on the vmalloc or module mapping area
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
 {
 	unsigned long pgd_paddr;
 	pmd_t *pmd_k;
@@ -292,6 +292,7 @@
 
 	return 0;
 }
+NOKPROBE_SYMBOL(vmalloc_fault);
 
 /*
  * Did it hit the DOS screen memory VA from vm86 mode?
@@ -359,7 +360,7 @@
  *
  * This assumes no large pages in there.
  */
-static noinline __kprobes int vmalloc_fault(unsigned long address)
+static noinline int vmalloc_fault(unsigned long address)
 {
 	pgd_t *pgd, *pgd_ref;
 	pud_t *pud, *pud_ref;
@@ -426,6 +427,7 @@
 
 	return 0;
 }
+NOKPROBE_SYMBOL(vmalloc_fault);
 
 #ifdef CONFIG_CPU_SUP_AMD
 static const char errata93_warning[] =
@@ -928,7 +930,7 @@
  * There are no security implications to leaving a stale TLB when
  * increasing the permissions on a page.
  */
-static noinline __kprobes int
+static noinline int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
 	pgd_t *pgd;
@@ -976,6 +978,7 @@
 
 	return ret;
 }
+NOKPROBE_SYMBOL(spurious_fault);
 
 int show_unhandled_signals = 1;
 
@@ -1031,7 +1034,7 @@
  * {,trace_}do_page_fault() have notrace on. Having this an actual function
  * guarantees there's a function trace entry.
  */
-static void __kprobes noinline
+static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 		unsigned long address)
 {
@@ -1254,8 +1257,9 @@
 
 	up_read(&mm->mmap_sem);
 }
+NOKPROBE_SYMBOL(__do_page_fault);
 
-dotraplinkage void __kprobes notrace
+dotraplinkage void notrace
 do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	unsigned long address = read_cr2(); /* Get the faulting address */
@@ -1273,10 +1277,12 @@
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(do_page_fault);
 
 #ifdef CONFIG_TRACING
-static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-				     unsigned long error_code)
+static nokprobe_inline void
+trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
+			 unsigned long error_code)
 {
 	if (user_mode(regs))
 		trace_page_fault_user(address, regs, error_code);
@@ -1284,7 +1290,7 @@
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void __kprobes notrace
+dotraplinkage void notrace
 trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	/*
@@ -1301,4 +1307,5 @@
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
+NOKPROBE_SYMBOL(trace_do_page_fault);
 #endif /* CONFIG_TRACING */

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 9769df0..3c0809a 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile

@@ -9,18 +9,9 @@
 VDSO32-$(CONFIG_X86_32)		:= y
 VDSO32-$(CONFIG_COMPAT)		:= y
 
-vdso-install-$(VDSO64-y)	+= vdso.so
-vdso-install-$(VDSOX32-y)	+= vdsox32.so
-vdso-install-$(VDSO32-y)	+= $(vdso32-images)
-
-
 # files to link into the vdso
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
-
-vobjs-$(VDSOX32-y) += $(vobjx32s-compat)
-
-# Filter out x32 objects.
-vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
+vobjs-nox32 := vdso-fakesections.o
 
 # files to link into kernel
 obj-y				+= vma.o
@@ -34,7 +25,7 @@
 
 obj-$(VDSO32-y)			+= vdso32-setup.o
 
-vobjs := $(foreach F,$(vobj64s),$(obj)/$F)
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
@@ -104,7 +95,13 @@
 			   -Wl,-z,max-page-size=4096 \
 			   -Wl,-z,common-page-size=4096
 
-vobjx32s-y := $(vobj64s:.o=-x32.o)
+# 64-bit objects to re-brand as x32
+vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
+
+# x32-rebranded versions
+vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
+
+# same thing, but in the output directory
 vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
 
 # Convert 64bit object file to x32 for x32 vDSO.
@@ -176,15 +173,20 @@
 GCOV_PROFILE := n
 
 #
-# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+# Install the unstripped copies of vdso*.so.
 #
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
+quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
+      cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%)
+
+vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
+
+$(MODLIB)/vdso: FORCE
 	@mkdir -p $(MODLIB)/vdso
+
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
 	$(call cmd,vdso_install)
 
-PHONY += vdso_install $(vdso-install-y)
-vdso_install: $(vdso-install-y)
+PHONY += vdso_install $(vdso_img_insttargets)
+vdso_install: $(vdso_img_insttargets) FORCE
 
 clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80*

diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
new file mode 100644
index 0000000..cb8a8d7
--- /dev/null
+++ b/arch/x86/vdso/vdso-fakesections.c

@@ -0,0 +1,32 @@
+/*
+ * Copyright 2014 Andy Lutomirski
+ * Subject to the GNU Public License, v.2
+ *
+ * Hack to keep broken Go programs working.
+ *
+ * The Go runtime had a couple of bugs: it would read the section table to try
+ * to figure out how many dynamic symbols there were (it shouldn't have looked
+ * at the section table at all) and, if there were no SHT_SYNDYM section table
+ * entry, it would use an uninitialized value for the number of symbols.  As a
+ * workaround, we supply a minimal section table.  vdso2c will adjust the
+ * in-memory image so that "vdso_fake_sections" becomes the section table.
+ *
+ * The bug was introduced by:
+ * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31)
+ * and is being addressed in the Go runtime in this issue:
+ * https://code.google.com/p/go/issues/detail?id=8197
+ */
+
+#ifndef __x86_64__
+#error This hack is specific to the 64-bit vDSO
+#endif
+
+#include <linux/elf.h>
+
+extern const __visible struct elf64_shdr vdso_fake_sections[];
+const __visible struct elf64_shdr vdso_fake_sections[] = {
+	{
+		.sh_type = SHT_DYNSYM,
+		.sh_entsize = sizeof(Elf64_Sym),
+	}
+};

diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 450ac6e..7a6bf50 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c

@@ -54,7 +54,7 @@
 }
 
 /*
- * Evil macros to do a little-endian read.
+ * Evil macros for little-endian reads and writes
  */
 #define GLE(x, bits, ifnot)						\
 	__builtin_choose_expr(						\
@@ -62,11 +62,24 @@
 		(__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
 
 extern void bad_get_le(void);
-#define LAST_LE(x)							\
+#define LAST_GLE(x)							\
 	__builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
 
 #define GET_LE(x)							\
-	GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x))))
+	GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x))))
+
+#define PLE(x, val, bits, ifnot)					\
+	__builtin_choose_expr(						\
+		(sizeof(*(x)) == bits/8),				\
+		put_unaligned_le##bits((val), (x)), ifnot)
+
+extern void bad_put_le(void);
+#define LAST_PLE(x, val)						\
+	__builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le())
+
+#define PUT_LE(x, val)					\
+	PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val))))
+
 
 #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
 

diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index 8a07463..c6eefaf 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h

@@ -18,6 +18,8 @@
 	const char *secstrings;
 	uint64_t syms[NSYMS] = {};
 
+	uint64_t fake_sections_value = 0, fake_sections_size = 0;
+
 	Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff));
 
 	/* Walk the segment table. */
@@ -84,6 +86,7 @@
 			GET_LE(&symtab_hdr->sh_entsize) * i;
 		const char *name = addr + GET_LE(&strtab_hdr->sh_offset) +
 			GET_LE(&sym->st_name);
+
 		for (k = 0; k < NSYMS; k++) {
 			if (!strcmp(name, required_syms[k])) {
 				if (syms[k]) {
@@ -93,6 +96,13 @@
 				syms[k] = GET_LE(&sym->st_value);
 			}
 		}
+
+		if (!strcmp(name, "vdso_fake_sections")) {
+			if (fake_sections_value)
+				fail("duplicate vdso_fake_sections\n");
+			fake_sections_value = GET_LE(&sym->st_value);
+			fake_sections_size = GET_LE(&sym->st_size);
+		}
 	}
 
 	/* Validate mapping addresses. */
@@ -112,11 +122,14 @@
 	if (syms[sym_end_mapping] % 4096)
 		fail("end_mapping must be a multiple of 4096\n");
 
-	/* Remove sections. */
-	hdr->e_shoff = 0;
-	hdr->e_shentsize = 0;
-	hdr->e_shnum = 0;
-	hdr->e_shstrndx = htole16(SHN_UNDEF);
+	/* Remove sections or use fakes */
+	if (fake_sections_size % sizeof(Elf_Shdr))
+		fail("vdso_fake_sections size is not a multiple of %ld\n",
+		     (long)sizeof(Elf_Shdr));
+	PUT_LE(&hdr->e_shoff, fake_sections_value);
+	PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0);
+	PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr));
+	PUT_LE(&hdr->e_shstrndx, SHN_UNDEF);
 
 	if (!name) {
 		fwrite(addr, load_size, 1, outfile);

diff --git a/block/blk-core.c b/block/blk-core.c
index 9aca8c7..f6f6b9a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -43,6 +43,7 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a842c71..02351e2 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c

@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/nvme.h>
@@ -46,16 +42,26 @@
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
-#define NVME_Q_DEPTH 1024
+#include <trace/events/block.h>
+
+#define NVME_Q_DEPTH		1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
-#define ADMIN_TIMEOUT	(60 * HZ)
-#define IOD_TIMEOUT	(4 * NVME_IO_TIMEOUT)
+#define ADMIN_TIMEOUT		(admin_timeout * HZ)
+#define IOD_TIMEOUT		(retry_time * HZ)
 
-unsigned char io_timeout = 30;
-module_param(io_timeout, byte, 0644);
+static unsigned char admin_timeout = 60;
+module_param(admin_timeout, byte, 0644);
+MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 
+static unsigned char retry_time = 30;
+module_param(retry_time, byte, 0644);
+MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O");
+
 static int nvme_major;
 module_param(nvme_major, int, 0);
 
@@ -67,6 +73,7 @@
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 static wait_queue_head_t nvme_kthread_wait;
+static struct notifier_block nvme_nb;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
 
@@ -199,16 +206,13 @@
 #define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
-#define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
-#define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)
+#define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	if (ctx == CMD_CTX_CANCELLED)
 		return;
-	if (ctx == CMD_CTX_FLUSH)
-		return;
 	if (ctx == CMD_CTX_ABORT) {
 		++nvmeq->dev->abort_limit;
 		return;
@@ -247,8 +251,9 @@
 	void *ctx;
 	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
 
-	if (cmdid >= nvmeq->q_depth) {
-		*fn = special_completion;
+	if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) {
+		if (fn)
+			*fn = special_completion;
 		return CMD_CTX_INVALID;
 	}
 	if (fn)
@@ -281,9 +286,17 @@
 
 static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
 {
+	struct nvme_queue *nvmeq;
 	unsigned queue_id = get_cpu_var(*dev->io_queue);
+
 	rcu_read_lock();
-	return rcu_dereference(dev->queues[queue_id]);
+	nvmeq = rcu_dereference(dev->queues[queue_id]);
+	if (nvmeq)
+		return nvmeq;
+
+	rcu_read_unlock();
+	put_cpu_var(*dev->io_queue);
+	return NULL;
 }
 
 static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@ -295,8 +308,15 @@
 static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
 							__acquires(RCU)
 {
+	struct nvme_queue *nvmeq;
+
 	rcu_read_lock();
-	return rcu_dereference(dev->queues[q_idx]);
+	nvmeq = rcu_dereference(dev->queues[q_idx]);
+	if (nvmeq)
+		return nvmeq;
+
+	rcu_read_unlock();
+	return NULL;
 }
 
 static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@ -387,25 +407,30 @@
 static void nvme_start_io_acct(struct bio *bio)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	const int rw = bio_data_dir(bio);
-	int cpu = part_stat_lock();
-	part_round_stats(cpu, &disk->part0);
-	part_stat_inc(cpu, &disk->part0, ios[rw]);
-	part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
-	part_inc_in_flight(&disk->part0, rw);
-	part_stat_unlock();
+	if (blk_queue_io_stat(disk->queue)) {
+		const int rw = bio_data_dir(bio);
+		int cpu = part_stat_lock();
+		part_round_stats(cpu, &disk->part0);
+		part_stat_inc(cpu, &disk->part0, ios[rw]);
+		part_stat_add(cpu, &disk->part0, sectors[rw],
+							bio_sectors(bio));
+		part_inc_in_flight(&disk->part0, rw);
+		part_stat_unlock();
+	}
 }
 
 static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	const int rw = bio_data_dir(bio);
-	unsigned long duration = jiffies - start_time;
-	int cpu = part_stat_lock();
-	part_stat_add(cpu, &disk->part0, ticks[rw], duration);
-	part_round_stats(cpu, &disk->part0);
-	part_dec_in_flight(&disk->part0, rw);
-	part_stat_unlock();
+	if (blk_queue_io_stat(disk->queue)) {
+		const int rw = bio_data_dir(bio);
+		unsigned long duration = jiffies - start_time;
+		int cpu = part_stat_lock();
+		part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+		part_round_stats(cpu, &disk->part0);
+		part_dec_in_flight(&disk->part0, rw);
+		part_stat_unlock();
+	}
 }
 
 static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -414,6 +439,7 @@
 	struct nvme_iod *iod = ctx;
 	struct bio *bio = iod->private;
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
+	int error = 0;
 
 	if (unlikely(status)) {
 		if (!(status & NVME_SC_DNR ||
@@ -426,6 +452,7 @@
 			wake_up(&nvmeq->sq_full);
 			return;
 		}
+		error = -EIO;
 	}
 	if (iod->nents) {
 		dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
@@ -433,10 +460,9 @@
 		nvme_end_io_acct(bio, iod->start_time);
 	}
 	nvme_free_iod(nvmeq->dev, iod);
-	if (status)
-		bio_endio(bio, -EIO);
-	else
-		bio_endio(bio, 0);
+
+	trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error);
+	bio_endio(bio, error);
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
@@ -525,6 +551,8 @@
 	if (!split)
 		return -ENOMEM;
 
+	trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
+					split->bi_iter.bi_sector);
 	bio_chain(split, bio);
 
 	if (!waitqueue_active(&nvmeq->sq_full))
@@ -627,16 +655,6 @@
 	return 0;
 }
 
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
-{
-	int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
-					special_completion, NVME_IO_TIMEOUT);
-	if (unlikely(cmdid < 0))
-		return cmdid;
-
-	return nvme_submit_flush(nvmeq, ns, cmdid);
-}
-
 static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 {
 	struct bio *bio = iod->private;
@@ -652,7 +670,7 @@
 
 	if (bio->bi_rw & REQ_DISCARD)
 		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-	if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
+	if (bio->bi_rw & REQ_FLUSH)
 		return nvme_submit_flush(nvmeq, ns, cmdid);
 
 	control = 0;
@@ -686,6 +704,26 @@
 	return 0;
 }
 
+static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
+{
+	struct bio *split = bio_clone(bio, GFP_ATOMIC);
+	if (!split)
+		return -ENOMEM;
+
+	split->bi_iter.bi_size = 0;
+	split->bi_phys_segments = 0;
+	bio->bi_rw &= ~REQ_FLUSH;
+	bio_chain(split, bio);
+
+	if (!waitqueue_active(&nvmeq->sq_full))
+		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+	bio_list_add(&nvmeq->sq_cong, split);
+	bio_list_add(&nvmeq->sq_cong, bio);
+	wake_up_process(nvme_thread);
+
+	return 0;
+}
+
 /*
  * Called with local interrupts disabled and the q_lock held.  May not sleep.
  */
@@ -696,11 +734,8 @@
 	int psegs = bio_phys_segments(ns->queue, bio);
 	int result;
 
-	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
-		result = nvme_submit_flush_data(nvmeq, ns);
-		if (result)
-			return result;
-	}
+	if ((bio->bi_rw & REQ_FLUSH) && psegs)
+		return nvme_split_flush_data(nvmeq, bio);
 
 	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
 	if (!iod)
@@ -795,7 +830,6 @@
 	int result = -EBUSY;
 
 	if (!nvmeq) {
-		put_nvmeq(NULL);
 		bio_endio(bio, -EIO);
 		return;
 	}
@@ -870,10 +904,8 @@
 	struct nvme_queue *nvmeq;
 
 	nvmeq = lock_nvmeq(dev, q_idx);
-	if (!nvmeq) {
-		unlock_nvmeq(nvmeq);
+	if (!nvmeq)
 		return -ENODEV;
-	}
 
 	cmdinfo.task = current;
 	cmdinfo.status = -EINTR;
@@ -898,9 +930,10 @@
 
 	if (cmdinfo.status == -EINTR) {
 		nvmeq = lock_nvmeq(dev, q_idx);
-		if (nvmeq)
+		if (nvmeq) {
 			nvme_abort_command(nvmeq, cmdid);
-		unlock_nvmeq(nvmeq);
+			unlock_nvmeq(nvmeq);
+		}
 		return -EINTR;
 	}
 
@@ -1358,7 +1391,8 @@
 			return -EINTR;
 		if (time_after(jiffies, timeout)) {
 			dev_err(&dev->pci_dev->dev,
-				"Device not ready; aborting initialisation\n");
+				"Device not ready; aborting %s\n", enabled ?
+						"initialisation" : "reset");
 			return -ENODEV;
 		}
 	}
@@ -1481,7 +1515,11 @@
 		goto put_pages;
 	}
 
+	err = -ENOMEM;
 	iod = nvme_alloc_iod(count, length, GFP_KERNEL);
+	if (!iod)
+		goto put_pages;
+
 	sg = iod->sg;
 	sg_init_table(sg, count);
 	for (i = 0; i < count; i++) {
@@ -1494,7 +1532,6 @@
 	sg_mark_end(&sg[i - 1]);
 	iod->nents = count;
 
-	err = -ENOMEM;
 	nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
 				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	if (!nents)
@@ -1894,6 +1931,8 @@
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
@@ -2062,8 +2101,13 @@
 
 	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
 								&result);
-	if (status)
-		return status < 0 ? -EIO : -EBUSY;
+	if (status < 0)
+		return status;
+	if (status > 0) {
+		dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
+									status);
+		return -EBUSY;
+	}
 	return min(result & 0xffff, result >> 16) + 1;
 }
 
@@ -2072,14 +2116,25 @@
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
+static void nvme_cpu_workfn(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
+	if (dev->initialized)
+		nvme_assign_io_queues(dev);
+}
+
 static int nvme_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-	struct nvme_dev *dev = container_of(self, struct nvme_dev, nb);
+	struct nvme_dev *dev;
+
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-		nvme_assign_io_queues(dev);
+		spin_lock(&dev_list_lock);
+		list_for_each_entry(dev, &dev_list, node)
+			schedule_work(&dev->cpu_work);
+		spin_unlock(&dev_list_lock);
 		break;
 	}
 	return NOTIFY_OK;
@@ -2148,11 +2203,6 @@
 	nvme_free_queues(dev, nr_io_queues + 1);
 	nvme_assign_io_queues(dev);
 
-	dev->nb.notifier_call = &nvme_cpu_notify;
-	result = register_hotcpu_notifier(&dev->nb);
-	if (result)
-		goto free_queues;
-
 	return 0;
 
  free_queues:
@@ -2184,6 +2234,7 @@
 
 	res = nvme_identify(dev, 0, 1, dma_addr);
 	if (res) {
+		dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
 		res = -EIO;
 		goto out;
 	}
@@ -2192,6 +2243,7 @@
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
+	dev->vwc = ctrl->vwc;
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2450,8 +2502,6 @@
 	int i;
 
 	dev->initialized = 0;
-	unregister_hotcpu_notifier(&dev->nb);
-
 	nvme_dev_list_remove(dev);
 
 	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
@@ -2722,6 +2772,7 @@
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+	INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
 	dev->pci_dev = pdev;
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -2801,6 +2852,7 @@
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->cpu_work);
 	misc_deregister(&dev->miscdev);
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
@@ -2889,11 +2941,18 @@
 	else if (result > 0)
 		nvme_major = result;
 
-	result = pci_register_driver(&nvme_driver);
+	nvme_nb.notifier_call = &nvme_cpu_notify;
+	result = register_hotcpu_notifier(&nvme_nb);
 	if (result)
 		goto unregister_blkdev;
+
+	result = pci_register_driver(&nvme_driver);
+	if (result)
+		goto unregister_hotcpu;
 	return 0;
 
+ unregister_hotcpu:
+	unregister_hotcpu_notifier(&nvme_nb);
  unregister_blkdev:
 	unregister_blkdev(nvme_major, "nvme");
  kill_workq:
@@ -2904,9 +2963,11 @@
 static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
+	unregister_hotcpu_notifier(&nvme_nb);
 	unregister_blkdev(nvme_major, "nvme");
 	destroy_workqueue(nvme_workq);
 	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
+	_nvme_check_size();
 }
 
 MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 2c3f5be..a4cd6d6 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c

@@ -1,6 +1,6 @@
 /*
  * NVM Express device driver
- * Copyright (c) 2011, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 /*
@@ -243,8 +239,6 @@
 #define READ_CAP_16_RESP_SIZE				32
 
 /* NVMe Namespace and Command Defines */
-#define NVME_GET_SMART_LOG_PAGE				0x02
-#define NVME_GET_FEAT_TEMP_THRESH			0x04
 #define BYTES_TO_DWORDS					4
 #define NVME_MAX_FIRMWARE_SLOT				7
 
@@ -686,6 +680,7 @@
 	u8 resp_data_format = 0x02;
 	u8 protect;
 	u8 cmdque = 0x01 << 1;
+	u8 fw_offset = sizeof(dev->firmware_rev);
 
 	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
 				&dma_addr, GFP_KERNEL);
@@ -721,7 +716,11 @@
 	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
 	strncpy(&inq_response[8], "NVMe    ", 8);
 	strncpy(&inq_response[16], dev->model, 16);
-	strncpy(&inq_response[32], dev->firmware_rev, 4);
+
+	while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+		fw_offset--;
+	fw_offset -= 4;
+	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -1018,8 +1017,8 @@
 	c.common.opcode = nvme_admin_get_log_page;
 	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
 	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE);
+	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
+			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
 	res = nvme_submit_admin_cmd(dev, &c, NULL);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c = LOG_TEMP_UNKNOWN;
@@ -1086,8 +1085,8 @@
 	c.common.opcode = nvme_admin_get_log_page;
 	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
 	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE);
+	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
+			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
 	res = nvme_submit_admin_cmd(dev, &c, NULL);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c_cur = LOG_TEMP_UNKNOWN;
@@ -1477,7 +1476,7 @@
 		goto out_dma;
 	}
 	id_ctrl = mem;
-	lowest_pow_st = id_ctrl->npss - 1;
+	lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
 
 	switch (pc) {
 	case NVME_POWER_STATE_START_VALID:
@@ -1494,20 +1493,19 @@
 		break;
 	case NVME_POWER_STATE_IDLE:
 		/* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */
-		/* min of desired state and (lps-1) because lps is STOP */
 		if (pcmod == 0x0)
-			ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1));
+			ps_desired = POWER_STATE_1;
 		else if (pcmod == 0x1)
-			ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1));
+			ps_desired = POWER_STATE_2;
 		else if (pcmod == 0x2)
-			ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1));
+			ps_desired = POWER_STATE_3;
 		break;
 	case NVME_POWER_STATE_STANDBY:
 		/* Action unspecified if POWER CONDITION MODIFIER != [0,1] */
 		if (pcmod == 0x0)
-			ps_desired = max(0, (lowest_pow_st - 2));
+			ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2));
 		else if (pcmod == 0x1)
-			ps_desired = max(0, (lowest_pow_st - 1));
+			ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1));
 		break;
 	case NVME_POWER_STATE_LU_CONTROL:
 	default:

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4c95b50..bbeb404 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -541,7 +541,6 @@
 		return -ENOENT;
 
 	(void) get_device(&rbd_dev->dev);
-	set_device_ro(bdev, rbd_dev->mapping.read_only);
 
 	return 0;
 }
@@ -559,10 +558,76 @@
 	put_device(&rbd_dev->dev);
 }
 
+static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
+{
+	int ret = 0;
+	int val;
+	bool ro;
+	bool ro_changed = false;
+
+	/* get_user() may sleep, so call it before taking rbd_dev->lock */
+	if (get_user(val, (int __user *)(arg)))
+		return -EFAULT;
+
+	ro = val ? true : false;
+	/* Snapshot doesn't allow to write*/
+	if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
+		return -EROFS;
+
+	spin_lock_irq(&rbd_dev->lock);
+	/* prevent others open this device */
+	if (rbd_dev->open_count > 1) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (rbd_dev->mapping.read_only != ro) {
+		rbd_dev->mapping.read_only = ro;
+		ro_changed = true;
+	}
+
+out:
+	spin_unlock_irq(&rbd_dev->lock);
+	/* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */
+	if (ret == 0 && ro_changed)
+		set_disk_ro(rbd_dev->disk, ro ? 1 : 0);
+
+	return ret;
+}
+
+static int rbd_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg)
+{
+	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+	int ret = 0;
+
+	switch (cmd) {
+	case BLKROSET:
+		ret = rbd_ioctl_set_ro(rbd_dev, arg);
+		break;
+	default:
+		ret = -ENOTTY;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode,
+				unsigned int cmd, unsigned long arg)
+{
+	return rbd_ioctl(bdev, mode, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
 static const struct block_device_operations rbd_bd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= rbd_open,
 	.release		= rbd_release,
+	.ioctl			= rbd_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl		= rbd_compat_ioctl,
+#endif
 };
 
 /*
@@ -1382,6 +1447,13 @@
 	kref_put(&obj_request->kref, rbd_obj_request_destroy);
 }
 
+static void rbd_img_request_get(struct rbd_img_request *img_request)
+{
+	dout("%s: img %p (was %d)\n", __func__, img_request,
+	     atomic_read(&img_request->kref.refcount));
+	kref_get(&img_request->kref);
+}
+
 static bool img_request_child_test(struct rbd_img_request *img_request);
 static void rbd_parent_request_destroy(struct kref *kref);
 static void rbd_img_request_destroy(struct kref *kref);
@@ -2142,6 +2214,7 @@
 	img_request->next_completion = which;
 out:
 	spin_unlock_irq(&img_request->completion_lock);
+	rbd_img_request_put(img_request);
 
 	if (!more)
 		rbd_img_request_complete(img_request);
@@ -2242,6 +2315,7 @@
 			goto out_unwind;
 		obj_request->osd_req = osd_req;
 		obj_request->callback = rbd_img_obj_callback;
+		rbd_img_request_get(img_request);
 
 		if (write_request) {
 			osd_req_op_alloc_hint_init(osd_req, which,
@@ -2872,56 +2946,55 @@
 }
 
 /*
- * Request sync osd watch/unwatch.  The value of "start" determines
- * whether a watch request is being initiated or torn down.
+ * Initiate a watch request, synchronously.
  */
-static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	int ret;
 
-	rbd_assert(start ^ !!rbd_dev->watch_event);
-	rbd_assert(start ^ !!rbd_dev->watch_request);
+	rbd_assert(!rbd_dev->watch_event);
+	rbd_assert(!rbd_dev->watch_request);
 
-	if (start) {
-		ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
-						&rbd_dev->watch_event);
-		if (ret < 0)
-			return ret;
-		rbd_assert(rbd_dev->watch_event != NULL);
-	}
+	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+				     &rbd_dev->watch_event);
+	if (ret < 0)
+		return ret;
 
-	ret = -ENOMEM;
+	rbd_assert(rbd_dev->watch_event);
+
 	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
-							OBJ_REQUEST_NODATA);
-	if (!obj_request)
+					     OBJ_REQUEST_NODATA);
+	if (!obj_request) {
+		ret = -ENOMEM;
 		goto out_cancel;
+	}
 
 	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
 						  obj_request);
-	if (!obj_request->osd_req)
-		goto out_cancel;
+	if (!obj_request->osd_req) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
 
-	if (start)
-		ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
-	else
-		ceph_osdc_unregister_linger_request(osdc,
-					rbd_dev->watch_request->osd_req);
+	ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
 
 	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
-				rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
+			      rbd_dev->watch_event->cookie, 0, 1);
 	rbd_osd_req_format_write(obj_request);
 
 	ret = rbd_obj_request_submit(osdc, obj_request);
 	if (ret)
-		goto out_cancel;
+		goto out_linger;
+
 	ret = rbd_obj_request_wait(obj_request);
 	if (ret)
-		goto out_cancel;
+		goto out_linger;
+
 	ret = obj_request->result;
 	if (ret)
-		goto out_cancel;
+		goto out_linger;
 
 	/*
 	 * A watch request is set to linger, so the underlying osd
@@ -2931,36 +3004,84 @@
 	 * it.  We'll drop that reference (below) after we've
 	 * unregistered it.
 	 */
-	if (start) {
-		rbd_dev->watch_request = obj_request;
+	rbd_dev->watch_request = obj_request;
 
-		return 0;
-	}
+	return 0;
 
-	/* We have successfully torn down the watch request */
-
-	rbd_obj_request_put(rbd_dev->watch_request);
-	rbd_dev->watch_request = NULL;
+out_linger:
+	ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req);
+out_put:
+	rbd_obj_request_put(obj_request);
 out_cancel:
-	/* Cancel the event if we're tearing down, or on error */
 	ceph_osdc_cancel_event(rbd_dev->watch_event);
 	rbd_dev->watch_event = NULL;
-	if (obj_request)
-		rbd_obj_request_put(obj_request);
 
 	return ret;
 }
 
-static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
+/*
+ * Tear down a watch request, synchronously.
+ */
+static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
 {
-	return __rbd_dev_header_watch_sync(rbd_dev, true);
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
+	int ret;
+
+	rbd_assert(rbd_dev->watch_event);
+	rbd_assert(rbd_dev->watch_request);
+
+	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+					     OBJ_REQUEST_NODATA);
+	if (!obj_request) {
+		ret = -ENOMEM;
+		goto out_cancel;
+	}
+
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
+						  obj_request);
+	if (!obj_request->osd_req) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+
+	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
+			      rbd_dev->watch_event->cookie, 0, 0);
+	rbd_osd_req_format_write(obj_request);
+
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out_put;
+
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out_put;
+
+	ret = obj_request->result;
+	if (ret)
+		goto out_put;
+
+	/* We have successfully torn down the watch request */
+
+	ceph_osdc_unregister_linger_request(osdc,
+					    rbd_dev->watch_request->osd_req);
+	rbd_obj_request_put(rbd_dev->watch_request);
+	rbd_dev->watch_request = NULL;
+
+out_put:
+	rbd_obj_request_put(obj_request);
+out_cancel:
+	ceph_osdc_cancel_event(rbd_dev->watch_event);
+	rbd_dev->watch_event = NULL;
+
+	return ret;
 }
 
 static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
 {
 	int ret;
 
-	ret = __rbd_dev_header_watch_sync(rbd_dev, false);
+	ret = __rbd_dev_header_unwatch_sync(rbd_dev);
 	if (ret) {
 		rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
 			 ret);
@@ -3058,7 +3179,6 @@
 		__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct rbd_device *rbd_dev = q->queuedata;
-	bool read_only = rbd_dev->mapping.read_only;
 	struct request *rq;
 	int result;
 
@@ -3094,7 +3214,7 @@
 
 		if (write_request) {
 			result = -EROFS;
-			if (read_only)
+			if (rbd_dev->mapping.read_only)
 				goto end_request;
 			rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
 		}
@@ -4683,6 +4803,38 @@
 }
 
 /*
+ * Return pool id (>= 0) or a negative error code.
+ */
+static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
+{
+	u64 newest_epoch;
+	unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
+	int tries = 0;
+	int ret;
+
+again:
+	ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
+	if (ret == -ENOENT && tries++ < 1) {
+		ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
+					       &newest_epoch);
+		if (ret < 0)
+			return ret;
+
+		if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
+			ceph_monc_request_next_osdmap(&rbdc->client->monc);
+			(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
+						     newest_epoch, timeout);
+			goto again;
+		} else {
+			/* the osdmap we have is new enough */
+			return -ENOENT;
+		}
+	}
+
+	return ret;
+}
+
+/*
  * An rbd format 2 image has a unique identifier, distinct from the
  * name given to it by the user.  Internally, that identifier is
  * what's used to specify the names of objects related to the image.
@@ -4752,7 +4904,7 @@
 
 		image_id = ceph_extract_encoded_string(&p, p + ret,
 						NULL, GFP_NOIO);
-		ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0;
+		ret = PTR_ERR_OR_ZERO(image_id);
 		if (!ret)
 			rbd_dev->image_format = 2;
 	} else {
@@ -4907,6 +5059,7 @@
 	if (ret)
 		goto err_out_disk;
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
+	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
 
 	ret = rbd_bus_add_dev(rbd_dev);
 	if (ret)
@@ -5053,7 +5206,6 @@
 	struct rbd_options *rbd_opts = NULL;
 	struct rbd_spec *spec = NULL;
 	struct rbd_client *rbdc;
-	struct ceph_osd_client *osdc;
 	bool read_only;
 	int rc = -ENOMEM;
 
@@ -5075,8 +5227,7 @@
 	}
 
 	/* pick the pool */
-	osdc = &rbdc->client->osdc;
-	rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
+	rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
 	if (rc < 0)
 		goto err_out_client;
 	spec->pool_id = (u64)rc;
@@ -5387,6 +5538,7 @@
 
 static void __exit rbd_exit(void)
 {
+	ida_destroy(&rbd_dev_id_ida);
 	rbd_sysfs_cleanup();
 	if (single_major)
 		unregister_blkdev(rbd_major, RBD_DRV_NAME);

diff --git a/drivers/clk/sunxi/Makefile b/drivers/clk/sunxi/Makefile
index b5bac91..762fd64 100644
--- a/drivers/clk/sunxi/Makefile
+++ b/drivers/clk/sunxi/Makefile

@@ -3,3 +3,7 @@
 #
 
 obj-y += clk-sunxi.o clk-factors.o
+obj-y += clk-a10-hosc.o
+obj-y += clk-a20-gmac.o
+
+obj-$(CONFIG_MFD_SUN6I_PRCM) += clk-sun6i-ar100.o clk-sun6i-apb0.o clk-sun6i-apb0-gates.o

diff --git a/drivers/clk/sunxi/clk-a10-hosc.c b/drivers/clk/sunxi/clk-a10-hosc.c
new file mode 100644
index 0000000..0481d5d
--- /dev/null
+++ b/drivers/clk/sunxi/clk-a10-hosc.c

@@ -0,0 +1,73 @@
+/*
+ * Copyright 2013 Emilio López
+ *
+ * Emilio López <emilio@elopez.com.ar>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#define SUNXI_OSC24M_GATE	0
+
+static DEFINE_SPINLOCK(hosc_lock);
+
+static void __init sun4i_osc_clk_setup(struct device_node *node)
+{
+	struct clk *clk;
+	struct clk_fixed_rate *fixed;
+	struct clk_gate *gate;
+	const char *clk_name = node->name;
+	u32 rate;
+
+	if (of_property_read_u32(node, "clock-frequency", &rate))
+		return;
+
+	/* allocate fixed-rate and gate clock structs */
+	fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL);
+	if (!fixed)
+		return;
+	gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
+	if (!gate)
+		goto err_free_fixed;
+
+	of_property_read_string(node, "clock-output-names", &clk_name);
+
+	/* set up gate and fixed rate properties */
+	gate->reg = of_iomap(node, 0);
+	gate->bit_idx = SUNXI_OSC24M_GATE;
+	gate->lock = &hosc_lock;
+	fixed->fixed_rate = rate;
+
+	clk = clk_register_composite(NULL, clk_name,
+			NULL, 0,
+			NULL, NULL,
+			&fixed->hw, &clk_fixed_rate_ops,
+			&gate->hw, &clk_gate_ops,
+			CLK_IS_ROOT);
+
+	if (IS_ERR(clk))
+		goto err_free_gate;
+
+	of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	clk_register_clkdev(clk, clk_name, NULL);
+
+	return;
+
+err_free_gate:
+	kfree(gate);
+err_free_fixed:
+	kfree(fixed);
+}
+CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-a10-osc-clk", sun4i_osc_clk_setup);

diff --git a/drivers/clk/sunxi/clk-a20-gmac.c b/drivers/clk/sunxi/clk-a20-gmac.c
new file mode 100644
index 0000000..633ddc4
--- /dev/null
+++ b/drivers/clk/sunxi/clk-a20-gmac.c

@@ -0,0 +1,119 @@
+/*
+ * Copyright 2013 Emilio López
+ * Emilio López <emilio@elopez.com.ar>
+ *
+ * Copyright 2013 Chen-Yu Tsai
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+
+static DEFINE_SPINLOCK(gmac_lock);
+
+/**
+ * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module
+ *
+ * This clock looks something like this
+ *                               ________________________
+ *  MII TX clock from PHY >-----|___________    _________|----> to GMAC core
+ *  GMAC Int. RGMII TX clk >----|___________\__/__gate---|----> to PHY
+ *  Ext. 125MHz RGMII TX clk >--|__divider__/            |
+ *                              |________________________|
+ *
+ * The external 125 MHz reference is optional, i.e. GMAC can use its
+ * internal TX clock just fine. The A31 GMAC clock module does not have
+ * the divider controls for the external reference.
+ *
+ * To keep it simple, let the GMAC use either the MII TX clock for MII mode,
+ * and its internal TX clock for GMII and RGMII modes. The GMAC driver should
+ * select the appropriate source and gate/ungate the output to the PHY.
+ *
+ * Only the GMAC should use this clock. Altering the clock so that it doesn't
+ * match the GMAC's operation parameters will result in the GMAC not being
+ * able to send traffic out. The GMAC driver should set the clock rate and
+ * enable/disable this clock to configure the required state. The clock
+ * driver then responds by auto-reparenting the clock.
+ */
+
+#define SUN7I_A20_GMAC_GPIT	2
+#define SUN7I_A20_GMAC_MASK	0x3
+#define SUN7I_A20_GMAC_PARENTS	2
+
+static void __init sun7i_a20_gmac_clk_setup(struct device_node *node)
+{
+	struct clk *clk;
+	struct clk_mux *mux;
+	struct clk_gate *gate;
+	const char *clk_name = node->name;
+	const char *parents[SUN7I_A20_GMAC_PARENTS];
+	void *reg;
+
+	if (of_property_read_string(node, "clock-output-names", &clk_name))
+		return;
+
+	/* allocate mux and gate clock structs */
+	mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
+	if (!mux)
+		return;
+
+	gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
+	if (!gate)
+		goto free_mux;
+
+	/* gmac clock requires exactly 2 parents */
+	parents[0] = of_clk_get_parent_name(node, 0);
+	parents[1] = of_clk_get_parent_name(node, 1);
+	if (!parents[0] || !parents[1])
+		goto free_gate;
+
+	reg = of_iomap(node, 0);
+	if (!reg)
+		goto free_gate;
+
+	/* set up gate and fixed rate properties */
+	gate->reg = reg;
+	gate->bit_idx = SUN7I_A20_GMAC_GPIT;
+	gate->lock = &gmac_lock;
+	mux->reg = reg;
+	mux->mask = SUN7I_A20_GMAC_MASK;
+	mux->flags = CLK_MUX_INDEX_BIT;
+	mux->lock = &gmac_lock;
+
+	clk = clk_register_composite(NULL, clk_name,
+			parents, SUN7I_A20_GMAC_PARENTS,
+			&mux->hw, &clk_mux_ops,
+			NULL, NULL,
+			&gate->hw, &clk_gate_ops,
+			0);
+
+	if (IS_ERR(clk))
+		goto iounmap_reg;
+
+	of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	clk_register_clkdev(clk, clk_name, NULL);
+
+	return;
+
+iounmap_reg:
+	iounmap(reg);
+free_gate:
+	kfree(gate);
+free_mux:
+	kfree(mux);
+}
+CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk",
+		sun7i_a20_gmac_clk_setup);

diff --git a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c
new file mode 100644
index 0000000..44cd27c
--- /dev/null
+++ b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c

@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * Allwinner A31 APB0 clock gates driver
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#define SUN6I_APB0_GATES_MAX_SIZE	32
+
+static int sun6i_a31_apb0_gates_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct clk_onecell_data *clk_data;
+	const char *clk_parent;
+	const char *clk_name;
+	struct resource *r;
+	void __iomem *reg;
+	int gate_id;
+	int ngates;
+	int i;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg = devm_ioremap_resource(&pdev->dev, r);
+	if (!reg)
+		return PTR_ERR(reg);
+
+	clk_parent = of_clk_get_parent_name(np, 0);
+	if (!clk_parent)
+		return -EINVAL;
+
+	ngates = of_property_count_strings(np, "clock-output-names");
+	if (ngates < 0)
+		return ngates;
+
+	if (!ngates || ngates > SUN6I_APB0_GATES_MAX_SIZE)
+		return -EINVAL;
+
+	clk_data = devm_kzalloc(&pdev->dev, sizeof(struct clk_onecell_data),
+				GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	clk_data->clks = devm_kzalloc(&pdev->dev,
+				      SUN6I_APB0_GATES_MAX_SIZE *
+				      sizeof(struct clk *),
+				      GFP_KERNEL);
+	if (!clk_data->clks)
+		return -ENOMEM;
+
+	for (i = 0; i < ngates; i++) {
+		of_property_read_string_index(np, "clock-output-names",
+					      i, &clk_name);
+
+		gate_id = i;
+		of_property_read_u32_index(np, "clock-indices", i, &gate_id);
+
+		WARN_ON(gate_id >= SUN6I_APB0_GATES_MAX_SIZE);
+		if (gate_id >= SUN6I_APB0_GATES_MAX_SIZE)
+			continue;
+
+		clk_data->clks[gate_id] = clk_register_gate(&pdev->dev,
+							    clk_name,
+							    clk_parent, 0,
+							    reg, gate_id,
+							    0, NULL);
+		WARN_ON(IS_ERR(clk_data->clks[gate_id]));
+	}
+
+	clk_data->clk_num = ngates;
+
+	return of_clk_add_provider(np, of_clk_src_onecell_get, clk_data);
+}
+
+const struct of_device_id sun6i_a31_apb0_gates_clk_dt_ids[] = {
+	{ .compatible = "allwinner,sun6i-a31-apb0-gates-clk" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver sun6i_a31_apb0_gates_clk_driver = {
+	.driver = {
+		.name = "sun6i-a31-apb0-gates-clk",
+		.owner = THIS_MODULE,
+		.of_match_table = sun6i_a31_apb0_gates_clk_dt_ids,
+	},
+	.probe = sun6i_a31_apb0_gates_clk_probe,
+};
+module_platform_driver(sun6i_a31_apb0_gates_clk_driver);
+
+MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("Allwinner A31 APB0 gate clocks driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/clk/sunxi/clk-sun6i-apb0.c b/drivers/clk/sunxi/clk-sun6i-apb0.c
new file mode 100644
index 0000000..11f17c3
--- /dev/null
+++ b/drivers/clk/sunxi/clk-sun6i-apb0.c

@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * Allwinner A31 APB0 clock driver
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+/*
+ * The APB0 clk has a configurable divisor.
+ *
+ * We must use a clk_div_table and not a regular power of 2
+ * divisor here, because the first 2 values divide the clock
+ * by 2.
+ */
+static const struct clk_div_table sun6i_a31_apb0_divs[] = {
+	{ .val = 0, .div = 2, },
+	{ .val = 1, .div = 2, },
+	{ .val = 2, .div = 4, },
+	{ .val = 3, .div = 8, },
+	{ /* sentinel */ },
+};
+
+static int sun6i_a31_apb0_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const char *clk_name = np->name;
+	const char *clk_parent;
+	struct resource *r;
+	void __iomem *reg;
+	struct clk *clk;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	clk_parent = of_clk_get_parent_name(np, 0);
+	if (!clk_parent)
+		return -EINVAL;
+
+	of_property_read_string(np, "clock-output-names", &clk_name);
+
+	clk = clk_register_divider_table(&pdev->dev, clk_name, clk_parent,
+					 0, reg, 0, 2, 0, sun6i_a31_apb0_divs,
+					 NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	return of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
+
+const struct of_device_id sun6i_a31_apb0_clk_dt_ids[] = {
+	{ .compatible = "allwinner,sun6i-a31-apb0-clk" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver sun6i_a31_apb0_clk_driver = {
+	.driver = {
+		.name = "sun6i-a31-apb0-clk",
+		.owner = THIS_MODULE,
+		.of_match_table = sun6i_a31_apb0_clk_dt_ids,
+	},
+	.probe = sun6i_a31_apb0_clk_probe,
+};
+module_platform_driver(sun6i_a31_apb0_clk_driver);
+
+MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("Allwinner A31 APB0 clock Driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c
new file mode 100644
index 0000000..f73cc05
--- /dev/null
+++ b/drivers/clk/sunxi/clk-sun6i-ar100.c

@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * Allwinner A31 AR100 clock driver
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#define SUN6I_AR100_MAX_PARENTS		4
+#define SUN6I_AR100_SHIFT_MASK		0x3
+#define SUN6I_AR100_SHIFT_MAX		SUN6I_AR100_SHIFT_MASK
+#define SUN6I_AR100_SHIFT_SHIFT		4
+#define SUN6I_AR100_DIV_MASK		0x1f
+#define SUN6I_AR100_DIV_MAX		(SUN6I_AR100_DIV_MASK + 1)
+#define SUN6I_AR100_DIV_SHIFT		8
+#define SUN6I_AR100_MUX_MASK		0x3
+#define SUN6I_AR100_MUX_SHIFT		16
+
+struct ar100_clk {
+	struct clk_hw hw;
+	void __iomem *reg;
+};
+
+static inline struct ar100_clk *to_ar100_clk(struct clk_hw *hw)
+{
+	return container_of(hw, struct ar100_clk, hw);
+}
+
+static unsigned long ar100_recalc_rate(struct clk_hw *hw,
+				       unsigned long parent_rate)
+{
+	struct ar100_clk *clk = to_ar100_clk(hw);
+	u32 val = readl(clk->reg);
+	int shift = (val >> SUN6I_AR100_SHIFT_SHIFT) & SUN6I_AR100_SHIFT_MASK;
+	int div = (val >> SUN6I_AR100_DIV_SHIFT) & SUN6I_AR100_DIV_MASK;
+
+	return (parent_rate >> shift) / (div + 1);
+}
+
+static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *best_parent_rate,
+				 struct clk **best_parent_clk)
+{
+	int nparents = __clk_get_num_parents(hw->clk);
+	long best_rate = -EINVAL;
+	int i;
+
+	*best_parent_clk = NULL;
+
+	for (i = 0; i < nparents; i++) {
+		unsigned long parent_rate;
+		unsigned long tmp_rate;
+		struct clk *parent;
+		unsigned long div;
+		int shift;
+
+		parent = clk_get_parent_by_index(hw->clk, i);
+		parent_rate = __clk_get_rate(parent);
+		div = DIV_ROUND_UP(parent_rate, rate);
+
+		/*
+		 * The AR100 clk contains 2 divisors:
+		 * - one power of 2 divisor
+		 * - one regular divisor
+		 *
+		 * First check if we can safely shift (or divide by a power
+		 * of 2) without losing precision on the requested rate.
+		 */
+		shift = ffs(div) - 1;
+		if (shift > SUN6I_AR100_SHIFT_MAX)
+			shift = SUN6I_AR100_SHIFT_MAX;
+
+		div >>= shift;
+
+		/*
+		 * Then if the divisor is still bigger than what the HW
+		 * actually supports, use a bigger shift (or power of 2
+		 * divider) value and accept to lose some precision.
+		 */
+		while (div > SUN6I_AR100_DIV_MAX) {
+			shift++;
+			div >>= 1;
+			if (shift > SUN6I_AR100_SHIFT_MAX)
+				break;
+		}
+
+		/*
+		 * If the shift value (or power of 2 divider) is bigger
+		 * than what the HW actually support, skip this parent.
+		 */
+		if (shift > SUN6I_AR100_SHIFT_MAX)
+			continue;
+
+		tmp_rate = (parent_rate >> shift) / div;
+		if (!*best_parent_clk || tmp_rate > best_rate) {
+			*best_parent_clk = parent;
+			*best_parent_rate = parent_rate;
+			best_rate = tmp_rate;
+		}
+	}
+
+	return best_rate;
+}
+
+static int ar100_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct ar100_clk *clk = to_ar100_clk(hw);
+	u32 val = readl(clk->reg);
+
+	if (index >= SUN6I_AR100_MAX_PARENTS)
+		return -EINVAL;
+
+	val &= ~(SUN6I_AR100_MUX_MASK << SUN6I_AR100_MUX_SHIFT);
+	val |= (index << SUN6I_AR100_MUX_SHIFT);
+	writel(val, clk->reg);
+
+	return 0;
+}
+
+static u8 ar100_get_parent(struct clk_hw *hw)
+{
+	struct ar100_clk *clk = to_ar100_clk(hw);
+	return (readl(clk->reg) >> SUN6I_AR100_MUX_SHIFT) &
+	       SUN6I_AR100_MUX_MASK;
+}
+
+static int ar100_set_rate(struct clk_hw *hw, unsigned long rate,
+			  unsigned long parent_rate)
+{
+	unsigned long div = parent_rate / rate;
+	struct ar100_clk *clk = to_ar100_clk(hw);
+	u32 val = readl(clk->reg);
+	int shift;
+
+	if (parent_rate % rate)
+		return -EINVAL;
+
+	shift = ffs(div) - 1;
+	if (shift > SUN6I_AR100_SHIFT_MAX)
+		shift = SUN6I_AR100_SHIFT_MAX;
+
+	div >>= shift;
+
+	if (div > SUN6I_AR100_DIV_MAX)
+		return -EINVAL;
+
+	val &= ~((SUN6I_AR100_SHIFT_MASK << SUN6I_AR100_SHIFT_SHIFT) |
+		 (SUN6I_AR100_DIV_MASK << SUN6I_AR100_DIV_SHIFT));
+	val |= (shift << SUN6I_AR100_SHIFT_SHIFT) |
+	       (div << SUN6I_AR100_DIV_SHIFT);
+	writel(val, clk->reg);
+
+	return 0;
+}
+
+struct clk_ops ar100_ops = {
+	.recalc_rate = ar100_recalc_rate,
+	.determine_rate = ar100_determine_rate,
+	.set_parent = ar100_set_parent,
+	.get_parent = ar100_get_parent,
+	.set_rate = ar100_set_rate,
+};
+
+static int sun6i_a31_ar100_clk_probe(struct platform_device *pdev)
+{
+	const char *parents[SUN6I_AR100_MAX_PARENTS];
+	struct device_node *np = pdev->dev.of_node;
+	const char *clk_name = np->name;
+	struct clk_init_data init;
+	struct ar100_clk *ar100;
+	struct resource *r;
+	struct clk *clk;
+	int nparents;
+	int i;
+
+	ar100 = devm_kzalloc(&pdev->dev, sizeof(*ar100), GFP_KERNEL);
+	if (!ar100)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ar100->reg = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(ar100->reg))
+		return PTR_ERR(ar100->reg);
+
+	nparents = of_clk_get_parent_count(np);
+	if (nparents > SUN6I_AR100_MAX_PARENTS)
+		nparents = SUN6I_AR100_MAX_PARENTS;
+
+	for (i = 0; i < nparents; i++)
+		parents[i] = of_clk_get_parent_name(np, i);
+
+	of_property_read_string(np, "clock-output-names", &clk_name);
+
+	init.name = clk_name;
+	init.ops = &ar100_ops;
+	init.parent_names = parents;
+	init.num_parents = nparents;
+	init.flags = 0;
+
+	ar100->hw.init = &init;
+
+	clk = clk_register(&pdev->dev, &ar100->hw);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	return of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
+
+const struct of_device_id sun6i_a31_ar100_clk_dt_ids[] = {
+	{ .compatible = "allwinner,sun6i-a31-ar100-clk" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver sun6i_a31_ar100_clk_driver = {
+	.driver = {
+		.name = "sun6i-a31-ar100-clk",
+		.owner = THIS_MODULE,
+		.of_match_table = sun6i_a31_ar100_clk_dt_ids,
+	},
+	.probe = sun6i_a31_ar100_clk_probe,
+};
+module_platform_driver(sun6i_a31_ar100_clk_driver);
+
+MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("Allwinner A31 AR100 clock Driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 4264834..fb2ce84 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c

@@ -28,63 +28,6 @@
 #define SUNXI_MAX_PARENTS	5
 
 /**
- * sun4i_osc_clk_setup() - Setup function for gatable oscillator
- */
-
-#define SUNXI_OSC24M_GATE	0
-
-static void __init sun4i_osc_clk_setup(struct device_node *node)
-{
-	struct clk *clk;
-	struct clk_fixed_rate *fixed;
-	struct clk_gate *gate;
-	const char *clk_name = node->name;
-	u32 rate;
-
-	if (of_property_read_u32(node, "clock-frequency", &rate))
-		return;
-
-	/* allocate fixed-rate and gate clock structs */
-	fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL);
-	if (!fixed)
-		return;
-	gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
-	if (!gate)
-		goto err_free_fixed;
-
-	of_property_read_string(node, "clock-output-names", &clk_name);
-
-	/* set up gate and fixed rate properties */
-	gate->reg = of_iomap(node, 0);
-	gate->bit_idx = SUNXI_OSC24M_GATE;
-	gate->lock = &clk_lock;
-	fixed->fixed_rate = rate;
-
-	clk = clk_register_composite(NULL, clk_name,
-			NULL, 0,
-			NULL, NULL,
-			&fixed->hw, &clk_fixed_rate_ops,
-			&gate->hw, &clk_gate_ops,
-			CLK_IS_ROOT);
-
-	if (IS_ERR(clk))
-		goto err_free_gate;
-
-	of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
-
-	return;
-
-err_free_gate:
-	kfree(gate);
-err_free_fixed:
-	kfree(fixed);
-}
-CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-a10-osc-clk", sun4i_osc_clk_setup);
-
-
-
-/**
  * sun4i_get_pll1_factors() - calculates n, k, m, p factors for PLL1
  * PLL1 rate is calculated as follows
  * rate = (parent_rate * n * (k + 1) >> p) / (m + 1);
@@ -408,104 +351,6 @@
 	*p = calcp;
 }
 
-
-
-/**
- * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module
- *
- * This clock looks something like this
- *                               ________________________
- *  MII TX clock from PHY >-----|___________    _________|----> to GMAC core
- *  GMAC Int. RGMII TX clk >----|___________\__/__gate---|----> to PHY
- *  Ext. 125MHz RGMII TX clk >--|__divider__/            |
- *                              |________________________|
- *
- * The external 125 MHz reference is optional, i.e. GMAC can use its
- * internal TX clock just fine. The A31 GMAC clock module does not have
- * the divider controls for the external reference.
- *
- * To keep it simple, let the GMAC use either the MII TX clock for MII mode,
- * and its internal TX clock for GMII and RGMII modes. The GMAC driver should
- * select the appropriate source and gate/ungate the output to the PHY.
- *
- * Only the GMAC should use this clock. Altering the clock so that it doesn't
- * match the GMAC's operation parameters will result in the GMAC not being
- * able to send traffic out. The GMAC driver should set the clock rate and
- * enable/disable this clock to configure the required state. The clock
- * driver then responds by auto-reparenting the clock.
- */
-
-#define SUN7I_A20_GMAC_GPIT	2
-#define SUN7I_A20_GMAC_MASK	0x3
-#define SUN7I_A20_GMAC_PARENTS	2
-
-static void __init sun7i_a20_gmac_clk_setup(struct device_node *node)
-{
-	struct clk *clk;
-	struct clk_mux *mux;
-	struct clk_gate *gate;
-	const char *clk_name = node->name;
-	const char *parents[SUN7I_A20_GMAC_PARENTS];
-	void *reg;
-
-	if (of_property_read_string(node, "clock-output-names", &clk_name))
-		return;
-
-	/* allocate mux and gate clock structs */
-	mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
-	if (!mux)
-		return;
-
-	gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
-	if (!gate)
-		goto free_mux;
-
-	/* gmac clock requires exactly 2 parents */
-	parents[0] = of_clk_get_parent_name(node, 0);
-	parents[1] = of_clk_get_parent_name(node, 1);
-	if (!parents[0] || !parents[1])
-		goto free_gate;
-
-	reg = of_iomap(node, 0);
-	if (!reg)
-		goto free_gate;
-
-	/* set up gate and fixed rate properties */
-	gate->reg = reg;
-	gate->bit_idx = SUN7I_A20_GMAC_GPIT;
-	gate->lock = &clk_lock;
-	mux->reg = reg;
-	mux->mask = SUN7I_A20_GMAC_MASK;
-	mux->flags = CLK_MUX_INDEX_BIT;
-	mux->lock = &clk_lock;
-
-	clk = clk_register_composite(NULL, clk_name,
-			parents, SUN7I_A20_GMAC_PARENTS,
-			&mux->hw, &clk_mux_ops,
-			NULL, NULL,
-			&gate->hw, &clk_gate_ops,
-			0);
-
-	if (IS_ERR(clk))
-		goto iounmap_reg;
-
-	of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
-
-	return;
-
-iounmap_reg:
-	iounmap(reg);
-free_gate:
-	kfree(gate);
-free_mux:
-	kfree(mux);
-}
-CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk",
-		sun7i_a20_gmac_clk_setup);
-
-
-
 /**
  * clk_sunxi_mmc_phase_control() - configures MMC clock phase control
  */
@@ -1009,6 +854,11 @@
 	.reset_mask = 0x03,
 };
 
+static const struct gates_data sun6i_a31_usb_gates_data __initconst = {
+	.mask = { BIT(18) | BIT(17) | BIT(16) | BIT(10) | BIT(9) | BIT(8) },
+	.reset_mask = BIT(2) | BIT(1) | BIT(0),
+};
+
 static void __init sunxi_gates_clk_setup(struct device_node *node,
 					 struct gates_data *data)
 {
@@ -1304,6 +1154,7 @@
 	{.compatible = "allwinner,sun6i-a31-apb2-gates-clk", .data = &sun6i_a31_apb2_gates_data,},
 	{.compatible = "allwinner,sun4i-a10-usb-clk", .data = &sun4i_a10_usb_gates_data,},
 	{.compatible = "allwinner,sun5i-a13-usb-clk", .data = &sun5i_a13_usb_gates_data,},
+	{.compatible = "allwinner,sun6i-a31-usb-clk", .data = &sun6i_a31_usb_gates_data,},
 	{}
 };
 
@@ -1321,33 +1172,10 @@
 	}
 }
 
-/**
- * System clock protection
- *
- * By enabling these critical clocks, we prevent their accidental gating
- * by the framework
- */
-static void __init sunxi_clock_protect(void)
+static void __init sunxi_init_clocks(const char *clocks[], int nclocks)
 {
-	struct clk *clk;
+	unsigned int i;
 
-	/* memory bus clock - sun5i+ */
-	clk = clk_get(NULL, "mbus");
-	if (!IS_ERR(clk)) {
-		clk_prepare_enable(clk);
-		clk_put(clk);
-	}
-
-	/* DDR clock - sun4i+ */
-	clk = clk_get(NULL, "pll5_ddr");
-	if (!IS_ERR(clk)) {
-		clk_prepare_enable(clk);
-		clk_put(clk);
-	}
-}
-
-static void __init sunxi_init_clocks(struct device_node *np)
-{
 	/* Register factor clocks */
 	of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup);
 
@@ -1363,11 +1191,48 @@
 	/* Register gate clocks */
 	of_sunxi_table_clock_setup(clk_gates_match, sunxi_gates_clk_setup);
 
-	/* Enable core system clocks */
-	sunxi_clock_protect();
+	/* Protect the clocks that needs to stay on */
+	for (i = 0; i < nclocks; i++) {
+		struct clk *clk = clk_get(NULL, clocks[i]);
+
+		if (!IS_ERR(clk))
+			clk_prepare_enable(clk);
+	}
 }
-CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sunxi_init_clocks);
-CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sunxi_init_clocks);
-CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sunxi_init_clocks);
-CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sunxi_init_clocks);
-CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sunxi_init_clocks);
+
+static const char *sun4i_a10_critical_clocks[] __initdata = {
+	"pll5_ddr",
+};
+
+static void __init sun4i_a10_init_clocks(struct device_node *node)
+{
+	sunxi_init_clocks(sun4i_a10_critical_clocks,
+			  ARRAY_SIZE(sun4i_a10_critical_clocks));
+}
+CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sun4i_a10_init_clocks);
+
+static const char *sun5i_critical_clocks[] __initdata = {
+	"mbus",
+	"pll5_ddr",
+};
+
+static void __init sun5i_init_clocks(struct device_node *node)
+{
+	sunxi_init_clocks(sun5i_critical_clocks,
+			  ARRAY_SIZE(sun5i_critical_clocks));
+}
+CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sun5i_init_clocks);
+CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sun5i_init_clocks);
+CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sun5i_init_clocks);
+
+static const char *sun6i_critical_clocks[] __initdata = {
+	"cpu",
+	"ahb1_sdram",
+};
+
+static void __init sun6i_init_clocks(struct device_node *node)
+{
+	sunxi_init_clocks(sun6i_critical_clocks,
+			  ARRAY_SIZE(sun6i_critical_clocks));
+}
+CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks);

diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile
index 4319d40..ed4d0aa 100644
--- a/drivers/clk/ti/Makefile
+++ b/drivers/clk/ti/Makefile

@@ -3,9 +3,11 @@
 clk-common				= dpll.o composite.o divider.o gate.o \
 					  fixed-factor.o mux.o apll.o
 obj-$(CONFIG_SOC_AM33XX)		+= $(clk-common) clk-33xx.o
+obj-$(CONFIG_ARCH_OMAP2)		+= $(clk-common) interface.o clk-2xxx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= $(clk-common) interface.o clk-3xxx.o
 obj-$(CONFIG_ARCH_OMAP4)		+= $(clk-common) clk-44xx.o
 obj-$(CONFIG_SOC_OMAP5)			+= $(clk-common) clk-54xx.o
-obj-$(CONFIG_SOC_DRA7XX)		+= $(clk-common) clk-7xx.o
+obj-$(CONFIG_SOC_DRA7XX)		+= $(clk-common) clk-7xx.o \
+					   clk-dra7-atl.o
 obj-$(CONFIG_SOC_AM43XX)		+= $(clk-common) clk-43xx.o
 endif

diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index b986f61..5428c9c 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c

@@ -221,3 +221,184 @@
 	kfree(init);
 }
 CLK_OF_DECLARE(dra7_apll_clock, "ti,dra7-apll-clock", of_dra7_apll_setup);
+
+#define OMAP2_EN_APLL_LOCKED	0x3
+#define OMAP2_EN_APLL_STOPPED	0x0
+
+static int omap2_apll_is_enabled(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ad->enable_mask;
+
+	v >>= __ffs(ad->enable_mask);
+
+	return v == OMAP2_EN_APLL_LOCKED ? 1 : 0;
+}
+
+static unsigned long omap2_apll_recalc(struct clk_hw *hw,
+				       unsigned long parent_rate)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+
+	if (omap2_apll_is_enabled(hw))
+		return clk->fixed_rate;
+
+	return 0;
+}
+
+static int omap2_apll_enable(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+	int i = 0;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ~ad->enable_mask;
+	v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+
+	while (1) {
+		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		if (v & ad->idlest_mask)
+			break;
+		if (i > MAX_APLL_WAIT_TRIES)
+			break;
+		i++;
+		udelay(1);
+	}
+
+	if (i == MAX_APLL_WAIT_TRIES) {
+		pr_warn("%s failed to transition to locked\n",
+			__clk_get_name(clk->hw.clk));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void omap2_apll_disable(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ~ad->enable_mask;
+	v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+}
+
+static struct clk_ops omap2_apll_ops = {
+	.enable		= &omap2_apll_enable,
+	.disable	= &omap2_apll_disable,
+	.is_enabled	= &omap2_apll_is_enabled,
+	.recalc_rate	= &omap2_apll_recalc,
+};
+
+static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val)
+{
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg);
+	v &= ~ad->autoidle_mask;
+	v |= val << __ffs(ad->autoidle_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+}
+
+#define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP	0x3
+#define OMAP2_APLL_AUTOIDLE_DISABLE		0x0
+
+static void omap2_apll_allow_idle(struct clk_hw_omap *clk)
+{
+	omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP);
+}
+
+static void omap2_apll_deny_idle(struct clk_hw_omap *clk)
+{
+	omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_DISABLE);
+}
+
+static struct clk_hw_omap_ops omap2_apll_hwops = {
+	.allow_idle	= &omap2_apll_allow_idle,
+	.deny_idle	= &omap2_apll_deny_idle,
+};
+
+static void __init of_omap2_apll_setup(struct device_node *node)
+{
+	struct dpll_data *ad = NULL;
+	struct clk_hw_omap *clk_hw = NULL;
+	struct clk_init_data *init = NULL;
+	struct clk *clk;
+	const char *parent_name;
+	u32 val;
+
+	ad = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	init = kzalloc(sizeof(*init), GFP_KERNEL);
+
+	if (!ad || !clk_hw || !init)
+		goto cleanup;
+
+	clk_hw->dpll_data = ad;
+	clk_hw->hw.init = init;
+	init->ops = &omap2_apll_ops;
+	init->name = node->name;
+	clk_hw->ops = &omap2_apll_hwops;
+
+	init->num_parents = of_clk_get_parent_count(node);
+	if (init->num_parents != 1) {
+		pr_err("%s must have one parent\n", node->name);
+		goto cleanup;
+	}
+
+	parent_name = of_clk_get_parent_name(node, 0);
+	init->parent_names = &parent_name;
+
+	if (of_property_read_u32(node, "ti,clock-frequency", &val)) {
+		pr_err("%s missing clock-frequency\n", node->name);
+		goto cleanup;
+	}
+	clk_hw->fixed_rate = val;
+
+	if (of_property_read_u32(node, "ti,bit-shift", &val)) {
+		pr_err("%s missing bit-shift\n", node->name);
+		goto cleanup;
+	}
+
+	clk_hw->enable_bit = val;
+	ad->enable_mask = 0x3 << val;
+	ad->autoidle_mask = 0x3 << val;
+
+	if (of_property_read_u32(node, "ti,idlest-shift", &val)) {
+		pr_err("%s missing idlest-shift\n", node->name);
+		goto cleanup;
+	}
+
+	ad->idlest_mask = 1 << val;
+
+	ad->control_reg = ti_clk_get_reg_addr(node, 0);
+	ad->autoidle_reg = ti_clk_get_reg_addr(node, 1);
+	ad->idlest_reg = ti_clk_get_reg_addr(node, 2);
+
+	if (!ad->control_reg || !ad->autoidle_reg || !ad->idlest_reg)
+		goto cleanup;
+
+	clk = clk_register(NULL, &clk_hw->hw);
+	if (!IS_ERR(clk)) {
+		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+		kfree(init);
+		return;
+	}
+cleanup:
+	kfree(ad);
+	kfree(clk_hw);
+	kfree(init);
+}
+CLK_OF_DECLARE(omap2_apll_clock, "ti,omap2-apll-clock",
+	       of_omap2_apll_setup);

diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c
new file mode 100644
index 0000000..c808ab3
--- /dev/null
+++ b/drivers/clk/ti/clk-2xxx.c

@@ -0,0 +1,256 @@
+/*
+ * OMAP2 Clock init
+ *
+ * Copyright (C) 2013 Texas Instruments, Inc
+ *     Tero Kristo (t-kristo@ti.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/clk-provider.h>
+#include <linux/clk/ti.h>
+
+static struct ti_dt_clk omap2xxx_clks[] = {
+	DT_CLK(NULL, "func_32k_ck", "func_32k_ck"),
+	DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"),
+	DT_CLK(NULL, "virt_12m_ck", "virt_12m_ck"),
+	DT_CLK(NULL, "virt_13m_ck", "virt_13m_ck"),
+	DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"),
+	DT_CLK(NULL, "virt_26m_ck", "virt_26m_ck"),
+	DT_CLK(NULL, "aplls_clkin_ck", "aplls_clkin_ck"),
+	DT_CLK(NULL, "aplls_clkin_x2_ck", "aplls_clkin_x2_ck"),
+	DT_CLK(NULL, "osc_ck", "osc_ck"),
+	DT_CLK(NULL, "sys_ck", "sys_ck"),
+	DT_CLK(NULL, "alt_ck", "alt_ck"),
+	DT_CLK(NULL, "mcbsp_clks", "mcbsp_clks"),
+	DT_CLK(NULL, "dpll_ck", "dpll_ck"),
+	DT_CLK(NULL, "apll96_ck", "apll96_ck"),
+	DT_CLK(NULL, "apll54_ck", "apll54_ck"),
+	DT_CLK(NULL, "func_54m_ck", "func_54m_ck"),
+	DT_CLK(NULL, "core_ck", "core_ck"),
+	DT_CLK(NULL, "func_96m_ck", "func_96m_ck"),
+	DT_CLK(NULL, "func_48m_ck", "func_48m_ck"),
+	DT_CLK(NULL, "func_12m_ck", "func_12m_ck"),
+	DT_CLK(NULL, "sys_clkout_src", "sys_clkout_src"),
+	DT_CLK(NULL, "sys_clkout", "sys_clkout"),
+	DT_CLK(NULL, "emul_ck", "emul_ck"),
+	DT_CLK(NULL, "mpu_ck", "mpu_ck"),
+	DT_CLK(NULL, "dsp_fck", "dsp_fck"),
+	DT_CLK(NULL, "gfx_3d_fck", "gfx_3d_fck"),
+	DT_CLK(NULL, "gfx_2d_fck", "gfx_2d_fck"),
+	DT_CLK(NULL, "gfx_ick", "gfx_ick"),
+	DT_CLK("omapdss_dss", "ick", "dss_ick"),
+	DT_CLK(NULL, "dss_ick", "dss_ick"),
+	DT_CLK(NULL, "dss1_fck", "dss1_fck"),
+	DT_CLK(NULL, "dss2_fck", "dss2_fck"),
+	DT_CLK(NULL, "dss_54m_fck", "dss_54m_fck"),
+	DT_CLK(NULL, "core_l3_ck", "core_l3_ck"),
+	DT_CLK(NULL, "ssi_fck", "ssi_ssr_sst_fck"),
+	DT_CLK(NULL, "usb_l4_ick", "usb_l4_ick"),
+	DT_CLK(NULL, "l4_ck", "l4_ck"),
+	DT_CLK(NULL, "ssi_l4_ick", "ssi_l4_ick"),
+	DT_CLK(NULL, "gpt1_ick", "gpt1_ick"),
+	DT_CLK(NULL, "gpt1_fck", "gpt1_fck"),
+	DT_CLK(NULL, "gpt2_ick", "gpt2_ick"),
+	DT_CLK(NULL, "gpt2_fck", "gpt2_fck"),
+	DT_CLK(NULL, "gpt3_ick", "gpt3_ick"),
+	DT_CLK(NULL, "gpt3_fck", "gpt3_fck"),
+	DT_CLK(NULL, "gpt4_ick", "gpt4_ick"),
+	DT_CLK(NULL, "gpt4_fck", "gpt4_fck"),
+	DT_CLK(NULL, "gpt5_ick", "gpt5_ick"),
+	DT_CLK(NULL, "gpt5_fck", "gpt5_fck"),
+	DT_CLK(NULL, "gpt6_ick", "gpt6_ick"),
+	DT_CLK(NULL, "gpt6_fck", "gpt6_fck"),
+	DT_CLK(NULL, "gpt7_ick", "gpt7_ick"),
+	DT_CLK(NULL, "gpt7_fck", "gpt7_fck"),
+	DT_CLK(NULL, "gpt8_ick", "gpt8_ick"),
+	DT_CLK(NULL, "gpt8_fck", "gpt8_fck"),
+	DT_CLK(NULL, "gpt9_ick", "gpt9_ick"),
+	DT_CLK(NULL, "gpt9_fck", "gpt9_fck"),
+	DT_CLK(NULL, "gpt10_ick", "gpt10_ick"),
+	DT_CLK(NULL, "gpt10_fck", "gpt10_fck"),
+	DT_CLK(NULL, "gpt11_ick", "gpt11_ick"),
+	DT_CLK(NULL, "gpt11_fck", "gpt11_fck"),
+	DT_CLK(NULL, "gpt12_ick", "gpt12_ick"),
+	DT_CLK(NULL, "gpt12_fck", "gpt12_fck"),
+	DT_CLK("omap-mcbsp.1", "ick", "mcbsp1_ick"),
+	DT_CLK(NULL, "mcbsp1_ick", "mcbsp1_ick"),
+	DT_CLK(NULL, "mcbsp1_fck", "mcbsp1_fck"),
+	DT_CLK("omap-mcbsp.2", "ick", "mcbsp2_ick"),
+	DT_CLK(NULL, "mcbsp2_ick", "mcbsp2_ick"),
+	DT_CLK(NULL, "mcbsp2_fck", "mcbsp2_fck"),
+	DT_CLK("omap2_mcspi.1", "ick", "mcspi1_ick"),
+	DT_CLK(NULL, "mcspi1_ick", "mcspi1_ick"),
+	DT_CLK(NULL, "mcspi1_fck", "mcspi1_fck"),
+	DT_CLK("omap2_mcspi.2", "ick", "mcspi2_ick"),
+	DT_CLK(NULL, "mcspi2_ick", "mcspi2_ick"),
+	DT_CLK(NULL, "mcspi2_fck", "mcspi2_fck"),
+	DT_CLK(NULL, "uart1_ick", "uart1_ick"),
+	DT_CLK(NULL, "uart1_fck", "uart1_fck"),
+	DT_CLK(NULL, "uart2_ick", "uart2_ick"),
+	DT_CLK(NULL, "uart2_fck", "uart2_fck"),
+	DT_CLK(NULL, "uart3_ick", "uart3_ick"),
+	DT_CLK(NULL, "uart3_fck", "uart3_fck"),
+	DT_CLK(NULL, "gpios_ick", "gpios_ick"),
+	DT_CLK(NULL, "gpios_fck", "gpios_fck"),
+	DT_CLK("omap_wdt", "ick", "mpu_wdt_ick"),
+	DT_CLK(NULL, "mpu_wdt_ick", "mpu_wdt_ick"),
+	DT_CLK(NULL, "mpu_wdt_fck", "mpu_wdt_fck"),
+	DT_CLK(NULL, "sync_32k_ick", "sync_32k_ick"),
+	DT_CLK(NULL, "wdt1_ick", "wdt1_ick"),
+	DT_CLK(NULL, "omapctrl_ick", "omapctrl_ick"),
+	DT_CLK("omap24xxcam", "fck", "cam_fck"),
+	DT_CLK(NULL, "cam_fck", "cam_fck"),
+	DT_CLK("omap24xxcam", "ick", "cam_ick"),
+	DT_CLK(NULL, "cam_ick", "cam_ick"),
+	DT_CLK(NULL, "mailboxes_ick", "mailboxes_ick"),
+	DT_CLK(NULL, "wdt4_ick", "wdt4_ick"),
+	DT_CLK(NULL, "wdt4_fck", "wdt4_fck"),
+	DT_CLK(NULL, "mspro_ick", "mspro_ick"),
+	DT_CLK(NULL, "mspro_fck", "mspro_fck"),
+	DT_CLK(NULL, "fac_ick", "fac_ick"),
+	DT_CLK(NULL, "fac_fck", "fac_fck"),
+	DT_CLK("omap_hdq.0", "ick", "hdq_ick"),
+	DT_CLK(NULL, "hdq_ick", "hdq_ick"),
+	DT_CLK("omap_hdq.0", "fck", "hdq_fck"),
+	DT_CLK(NULL, "hdq_fck", "hdq_fck"),
+	DT_CLK("omap_i2c.1", "ick", "i2c1_ick"),
+	DT_CLK(NULL, "i2c1_ick", "i2c1_ick"),
+	DT_CLK("omap_i2c.2", "ick", "i2c2_ick"),
+	DT_CLK(NULL, "i2c2_ick", "i2c2_ick"),
+	DT_CLK(NULL, "gpmc_fck", "gpmc_fck"),
+	DT_CLK(NULL, "sdma_fck", "sdma_fck"),
+	DT_CLK(NULL, "sdma_ick", "sdma_ick"),
+	DT_CLK(NULL, "sdrc_ick", "sdrc_ick"),
+	DT_CLK(NULL, "des_ick", "des_ick"),
+	DT_CLK("omap-sham", "ick", "sha_ick"),
+	DT_CLK(NULL, "sha_ick", "sha_ick"),
+	DT_CLK("omap_rng", "ick", "rng_ick"),
+	DT_CLK(NULL, "rng_ick", "rng_ick"),
+	DT_CLK("omap-aes", "ick", "aes_ick"),
+	DT_CLK(NULL, "aes_ick", "aes_ick"),
+	DT_CLK(NULL, "pka_ick", "pka_ick"),
+	DT_CLK(NULL, "usb_fck", "usb_fck"),
+	DT_CLK(NULL, "timer_32k_ck", "func_32k_ck"),
+	DT_CLK(NULL, "timer_sys_ck", "sys_ck"),
+	DT_CLK(NULL, "timer_ext_ck", "alt_ck"),
+	{ .node_name = NULL },
+};
+
+static struct ti_dt_clk omap2420_clks[] = {
+	DT_CLK(NULL, "sys_clkout2_src", "sys_clkout2_src"),
+	DT_CLK(NULL, "sys_clkout2", "sys_clkout2"),
+	DT_CLK(NULL, "dsp_ick", "dsp_ick"),
+	DT_CLK(NULL, "iva1_ifck", "iva1_ifck"),
+	DT_CLK(NULL, "iva1_mpu_int_ifck", "iva1_mpu_int_ifck"),
+	DT_CLK(NULL, "wdt3_ick", "wdt3_ick"),
+	DT_CLK(NULL, "wdt3_fck", "wdt3_fck"),
+	DT_CLK("mmci-omap.0", "ick", "mmc_ick"),
+	DT_CLK(NULL, "mmc_ick", "mmc_ick"),
+	DT_CLK("mmci-omap.0", "fck", "mmc_fck"),
+	DT_CLK(NULL, "mmc_fck", "mmc_fck"),
+	DT_CLK(NULL, "eac_ick", "eac_ick"),
+	DT_CLK(NULL, "eac_fck", "eac_fck"),
+	DT_CLK(NULL, "i2c1_fck", "i2c1_fck"),
+	DT_CLK(NULL, "i2c2_fck", "i2c2_fck"),
+	DT_CLK(NULL, "vlynq_ick", "vlynq_ick"),
+	DT_CLK(NULL, "vlynq_fck", "vlynq_fck"),
+	DT_CLK("musb-hdrc", "fck", "osc_ck"),
+	{ .node_name = NULL },
+};
+
+static struct ti_dt_clk omap2430_clks[] = {
+	DT_CLK("twl", "fck", "osc_ck"),
+	DT_CLK(NULL, "iva2_1_ick", "iva2_1_ick"),
+	DT_CLK(NULL, "mdm_ick", "mdm_ick"),
+	DT_CLK(NULL, "mdm_osc_ck", "mdm_osc_ck"),
+	DT_CLK("omap-mcbsp.3", "ick", "mcbsp3_ick"),
+	DT_CLK(NULL, "mcbsp3_ick", "mcbsp3_ick"),
+	DT_CLK(NULL, "mcbsp3_fck", "mcbsp3_fck"),
+	DT_CLK("omap-mcbsp.4", "ick", "mcbsp4_ick"),
+	DT_CLK(NULL, "mcbsp4_ick", "mcbsp4_ick"),
+	DT_CLK(NULL, "mcbsp4_fck", "mcbsp4_fck"),
+	DT_CLK("omap-mcbsp.5", "ick", "mcbsp5_ick"),
+	DT_CLK(NULL, "mcbsp5_ick", "mcbsp5_ick"),
+	DT_CLK(NULL, "mcbsp5_fck", "mcbsp5_fck"),
+	DT_CLK("omap2_mcspi.3", "ick", "mcspi3_ick"),
+	DT_CLK(NULL, "mcspi3_ick", "mcspi3_ick"),
+	DT_CLK(NULL, "mcspi3_fck", "mcspi3_fck"),
+	DT_CLK(NULL, "icr_ick", "icr_ick"),
+	DT_CLK(NULL, "i2chs1_fck", "i2chs1_fck"),
+	DT_CLK(NULL, "i2chs2_fck", "i2chs2_fck"),
+	DT_CLK("musb-omap2430", "ick", "usbhs_ick"),
+	DT_CLK(NULL, "usbhs_ick", "usbhs_ick"),
+	DT_CLK("omap_hsmmc.0", "ick", "mmchs1_ick"),
+	DT_CLK(NULL, "mmchs1_ick", "mmchs1_ick"),
+	DT_CLK(NULL, "mmchs1_fck", "mmchs1_fck"),
+	DT_CLK("omap_hsmmc.1", "ick", "mmchs2_ick"),
+	DT_CLK(NULL, "mmchs2_ick", "mmchs2_ick"),
+	DT_CLK(NULL, "mmchs2_fck", "mmchs2_fck"),
+	DT_CLK(NULL, "gpio5_ick", "gpio5_ick"),
+	DT_CLK(NULL, "gpio5_fck", "gpio5_fck"),
+	DT_CLK(NULL, "mdm_intc_ick", "mdm_intc_ick"),
+	DT_CLK("omap_hsmmc.0", "mmchsdb_fck", "mmchsdb1_fck"),
+	DT_CLK(NULL, "mmchsdb1_fck", "mmchsdb1_fck"),
+	DT_CLK("omap_hsmmc.1", "mmchsdb_fck", "mmchsdb2_fck"),
+	DT_CLK(NULL, "mmchsdb2_fck", "mmchsdb2_fck"),
+	{ .node_name = NULL },
+};
+
+static const char *enable_init_clks[] = {
+	"apll96_ck",
+	"apll54_ck",
+	"sync_32k_ick",
+	"omapctrl_ick",
+	"gpmc_fck",
+	"sdrc_ick",
+};
+
+enum {
+	OMAP2_SOC_OMAP2420,
+	OMAP2_SOC_OMAP2430,
+};
+
+static int __init omap2xxx_dt_clk_init(int soc_type)
+{
+	ti_dt_clocks_register(omap2xxx_clks);
+
+	if (soc_type == OMAP2_SOC_OMAP2420)
+		ti_dt_clocks_register(omap2420_clks);
+	else
+		ti_dt_clocks_register(omap2430_clks);
+
+	omap2xxx_clkt_vps_init();
+
+	omap2_clk_disable_autoidle_all();
+
+	omap2_clk_enable_init_clocks(enable_init_clks,
+				     ARRAY_SIZE(enable_init_clks));
+
+	pr_info("Clocking rate (Crystal/DPLL/MPU): %ld.%01ld/%ld/%ld MHz\n",
+		(clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 1000000),
+		(clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 100000) % 10,
+		(clk_get_rate(clk_get_sys(NULL, "dpll_ck")) / 1000000),
+		(clk_get_rate(clk_get_sys(NULL, "mpu_ck")) / 1000000));
+
+	return 0;
+}
+
+int __init omap2420_dt_clk_init(void)
+{
+	return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2420);
+}
+
+int __init omap2430_dt_clk_init(void)
+{
+	return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2430);
+}

diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c
index 08f3d1b..5e18399 100644
--- a/drivers/clk/ti/clk-54xx.c
+++ b/drivers/clk/ti/clk-54xx.c

@@ -240,6 +240,12 @@
 	if (rc)
 		pr_err("%s: failed to configure ABE DPLL!\n", __func__);
 
+	abe_dpll = clk_get_sys(NULL, "dpll_abe_m2x2_ck");
+	if (!rc)
+		rc = clk_set_rate(abe_dpll, OMAP5_DPLL_ABE_DEFFREQ * 2);
+	if (rc)
+		pr_err("%s: failed to configure ABE m2x2 DPLL!\n", __func__);
+
 	usb_dpll = clk_get_sys(NULL, "dpll_usb_ck");
 	rc = clk_set_rate(usb_dpll, OMAP5_DPLL_USB_DEFFREQ);
 	if (rc)

diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c
index f7e4073..e158133 100644
--- a/drivers/clk/ti/clk-7xx.c
+++ b/drivers/clk/ti/clk-7xx.c

@@ -24,7 +24,7 @@
 	DT_CLK(NULL, "atl_clkin0_ck", "atl_clkin0_ck"),
 	DT_CLK(NULL, "atl_clkin1_ck", "atl_clkin1_ck"),
 	DT_CLK(NULL, "atl_clkin2_ck", "atl_clkin2_ck"),
-	DT_CLK(NULL, "atlclkin3_ck", "atlclkin3_ck"),
+	DT_CLK(NULL, "atl_clkin3_ck", "atl_clkin3_ck"),
 	DT_CLK(NULL, "hdmi_clkin_ck", "hdmi_clkin_ck"),
 	DT_CLK(NULL, "mlb_clkin_ck", "mlb_clkin_ck"),
 	DT_CLK(NULL, "mlbp_clkin_ck", "mlbp_clkin_ck"),

diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
new file mode 100644
index 0000000..4a65b41
--- /dev/null
+++ b/drivers/clk/ti/clk-dra7-atl.c

@@ -0,0 +1,312 @@
+/*
+ * DRA7 ATL (Audio Tracking Logic) clock driver
+ *
+ * Copyright (C) 2013 Texas Instruments, Inc.
+ *
+ * Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#define DRA7_ATL_INSTANCES	4
+
+#define DRA7_ATL_PPMR_REG(id)		(0x200 + (id * 0x80))
+#define DRA7_ATL_BBSR_REG(id)		(0x204 + (id * 0x80))
+#define DRA7_ATL_ATLCR_REG(id)		(0x208 + (id * 0x80))
+#define DRA7_ATL_SWEN_REG(id)		(0x210 + (id * 0x80))
+#define DRA7_ATL_BWSMUX_REG(id)		(0x214 + (id * 0x80))
+#define DRA7_ATL_AWSMUX_REG(id)		(0x218 + (id * 0x80))
+#define DRA7_ATL_PCLKMUX_REG(id)	(0x21c + (id * 0x80))
+
+#define DRA7_ATL_SWEN			BIT(0)
+#define DRA7_ATL_DIVIDER_MASK		(0x1f)
+#define DRA7_ATL_PCLKMUX		BIT(0)
+struct dra7_atl_clock_info;
+
+struct dra7_atl_desc {
+	struct clk *clk;
+	struct clk_hw hw;
+	struct dra7_atl_clock_info *cinfo;
+	int id;
+
+	bool probed;		/* the driver for the IP has been loaded */
+	bool valid;		/* configured */
+	bool enabled;
+	u32 bws;		/* Baseband Word Select Mux */
+	u32 aws;		/* Audio Word Select Mux */
+	u32 divider;		/* Cached divider value */
+};
+
+struct dra7_atl_clock_info {
+	struct device *dev;
+	void __iomem *iobase;
+
+	struct dra7_atl_desc *cdesc;
+};
+
+#define to_atl_desc(_hw)	container_of(_hw, struct dra7_atl_desc, hw)
+
+static inline void atl_write(struct dra7_atl_clock_info *cinfo, u32 reg,
+			     u32 val)
+{
+	__raw_writel(val, cinfo->iobase + reg);
+}
+
+static inline int atl_read(struct dra7_atl_clock_info *cinfo, u32 reg)
+{
+	return __raw_readl(cinfo->iobase + reg);
+}
+
+static int atl_clk_enable(struct clk_hw *hw)
+{
+	struct dra7_atl_desc *cdesc = to_atl_desc(hw);
+
+	if (!cdesc->probed)
+		goto out;
+
+	if (unlikely(!cdesc->valid))
+		dev_warn(cdesc->cinfo->dev, "atl%d has not been configured\n",
+			 cdesc->id);
+	pm_runtime_get_sync(cdesc->cinfo->dev);
+
+	atl_write(cdesc->cinfo, DRA7_ATL_ATLCR_REG(cdesc->id),
+		  cdesc->divider - 1);
+	atl_write(cdesc->cinfo, DRA7_ATL_SWEN_REG(cdesc->id), DRA7_ATL_SWEN);
+
+out:
+	cdesc->enabled = true;
+
+	return 0;
+}
+
+static void atl_clk_disable(struct clk_hw *hw)
+{
+	struct dra7_atl_desc *cdesc = to_atl_desc(hw);
+
+	if (!cdesc->probed)
+		goto out;
+
+	atl_write(cdesc->cinfo, DRA7_ATL_SWEN_REG(cdesc->id), 0);
+	pm_runtime_put_sync(cdesc->cinfo->dev);
+
+out:
+	cdesc->enabled = false;
+}
+
+static int atl_clk_is_enabled(struct clk_hw *hw)
+{
+	struct dra7_atl_desc *cdesc = to_atl_desc(hw);
+
+	return cdesc->enabled;
+}
+
+static unsigned long atl_clk_recalc_rate(struct clk_hw *hw,
+					 unsigned long parent_rate)
+{
+	struct dra7_atl_desc *cdesc = to_atl_desc(hw);
+
+	return parent_rate / cdesc->divider;
+}
+
+static long atl_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long *parent_rate)
+{
+	unsigned divider;
+
+	divider = (*parent_rate + rate / 2) / rate;
+	if (divider > DRA7_ATL_DIVIDER_MASK + 1)
+		divider = DRA7_ATL_DIVIDER_MASK + 1;
+
+	return *parent_rate / divider;
+}
+
+static int atl_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			    unsigned long parent_rate)
+{
+	struct dra7_atl_desc *cdesc = to_atl_desc(hw);
+	u32 divider;
+
+	divider = ((parent_rate + rate / 2) / rate) - 1;
+	if (divider > DRA7_ATL_DIVIDER_MASK)
+		divider = DRA7_ATL_DIVIDER_MASK;
+
+	cdesc->divider = divider + 1;
+
+	return 0;
+}
+
+const struct clk_ops atl_clk_ops = {
+	.enable		= atl_clk_enable,
+	.disable	= atl_clk_disable,
+	.is_enabled	= atl_clk_is_enabled,
+	.recalc_rate	= atl_clk_recalc_rate,
+	.round_rate	= atl_clk_round_rate,
+	.set_rate	= atl_clk_set_rate,
+};
+
+static void __init of_dra7_atl_clock_setup(struct device_node *node)
+{
+	struct dra7_atl_desc *clk_hw = NULL;
+	struct clk_init_data init = { 0 };
+	const char **parent_names = NULL;
+	struct clk *clk;
+
+	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	if (!clk_hw) {
+		pr_err("%s: could not allocate dra7_atl_desc\n", __func__);
+		return;
+	}
+
+	clk_hw->hw.init = &init;
+	clk_hw->divider = 1;
+	init.name = node->name;
+	init.ops = &atl_clk_ops;
+	init.flags = CLK_IGNORE_UNUSED;
+	init.num_parents = of_clk_get_parent_count(node);
+
+	if (init.num_parents != 1) {
+		pr_err("%s: atl clock %s must have 1 parent\n", __func__,
+		       node->name);
+		goto cleanup;
+	}
+
+	parent_names = kzalloc(sizeof(char *), GFP_KERNEL);
+
+	if (!parent_names)
+		goto cleanup;
+
+	parent_names[0] = of_clk_get_parent_name(node, 0);
+
+	init.parent_names = parent_names;
+
+	clk = clk_register(NULL, &clk_hw->hw);
+
+	if (!IS_ERR(clk)) {
+		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+		return;
+	}
+cleanup:
+	kfree(parent_names);
+	kfree(clk_hw);
+}
+CLK_OF_DECLARE(dra7_atl_clock, "ti,dra7-atl-clock", of_dra7_atl_clock_setup);
+
+static int of_dra7_atl_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct dra7_atl_clock_info *cinfo;
+	int i;
+	int ret = 0;
+
+	if (!node)
+		return -ENODEV;
+
+	cinfo = devm_kzalloc(&pdev->dev, sizeof(*cinfo), GFP_KERNEL);
+	if (!cinfo)
+		return -ENOMEM;
+
+	cinfo->iobase = of_iomap(node, 0);
+	cinfo->dev = &pdev->dev;
+	pm_runtime_enable(cinfo->dev);
+
+	pm_runtime_get_sync(cinfo->dev);
+	atl_write(cinfo, DRA7_ATL_PCLKMUX_REG(0), DRA7_ATL_PCLKMUX);
+
+	for (i = 0; i < DRA7_ATL_INSTANCES; i++) {
+		struct device_node *cfg_node;
+		char prop[5];
+		struct dra7_atl_desc *cdesc;
+		struct of_phandle_args clkspec;
+		struct clk *clk;
+		int rc;
+
+		rc = of_parse_phandle_with_args(node, "ti,provided-clocks",
+						NULL, i, &clkspec);
+
+		if (rc) {
+			pr_err("%s: failed to lookup atl clock %d\n", __func__,
+			       i);
+			return -EINVAL;
+		}
+
+		clk = of_clk_get_from_provider(&clkspec);
+
+		cdesc = to_atl_desc(__clk_get_hw(clk));
+		cdesc->cinfo = cinfo;
+		cdesc->id = i;
+
+		/* Get configuration for the ATL instances */
+		snprintf(prop, sizeof(prop), "atl%u", i);
+		cfg_node = of_find_node_by_name(node, prop);
+		if (cfg_node) {
+			ret = of_property_read_u32(cfg_node, "bws",
+						   &cdesc->bws);
+			ret |= of_property_read_u32(cfg_node, "aws",
+						    &cdesc->aws);
+			if (!ret) {
+				cdesc->valid = true;
+				atl_write(cinfo, DRA7_ATL_BWSMUX_REG(i),
+					  cdesc->bws);
+				atl_write(cinfo, DRA7_ATL_AWSMUX_REG(i),
+					  cdesc->aws);
+			}
+		}
+
+		cdesc->probed = true;
+		/*
+		 * Enable the clock if it has been asked prior to loading the
+		 * hw driver
+		 */
+		if (cdesc->enabled)
+			atl_clk_enable(__clk_get_hw(clk));
+	}
+	pm_runtime_put_sync(cinfo->dev);
+
+	return ret;
+}
+
+static int of_dra7_atl_clk_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static struct of_device_id of_dra7_atl_clk_match_tbl[] = {
+	{ .compatible = "ti,dra7-atl", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_dra7_atl_clk_match_tbl);
+
+static struct platform_driver dra7_atl_clk_driver = {
+	.driver = {
+		.name = "dra7-atl",
+		.owner = THIS_MODULE,
+		.of_match_table = of_dra7_atl_clk_match_tbl,
+	},
+	.probe = of_dra7_atl_clk_probe,
+	.remove = of_dra7_atl_clk_remove,
+};
+
+module_platform_driver(dra7_atl_clk_driver);
+
+MODULE_DESCRIPTION("Clock driver for DRA7 Audio Tracking Logic");
+MODULE_ALIAS("platform:dra7-atl-clock");
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 7e498a4..abd956d 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c

@@ -25,8 +25,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define DPLL_HAS_AUTOIDLE	0x1
-
 #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \
 	defined(CONFIG_SOC_DRA7XX)
 static const struct clk_ops dpll_m4xen_ck_ops = {
@@ -37,21 +35,18 @@
 	.set_rate	= &omap3_noncore_dpll_set_rate,
 	.get_parent	= &omap2_init_dpll_parent,
 };
+#else
+static const struct clk_ops dpll_m4xen_ck_ops = {};
 #endif
 
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) || \
+	defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) || \
+	defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX)
 static const struct clk_ops dpll_core_ck_ops = {
 	.recalc_rate	= &omap3_dpll_recalc,
 	.get_parent	= &omap2_init_dpll_parent,
 };
 
-#ifdef CONFIG_ARCH_OMAP3
-static const struct clk_ops omap3_dpll_core_ck_ops = {
-	.get_parent	= &omap2_init_dpll_parent,
-	.recalc_rate	= &omap3_dpll_recalc,
-	.round_rate	= &omap2_dpll_round_rate,
-};
-#endif
-
 static const struct clk_ops dpll_ck_ops = {
 	.enable		= &omap3_noncore_dpll_enable,
 	.disable	= &omap3_noncore_dpll_disable,
@@ -67,6 +62,33 @@
 	.round_rate	= &omap2_dpll_round_rate,
 	.set_rate	= &omap3_noncore_dpll_set_rate,
 };
+#else
+static const struct clk_ops dpll_core_ck_ops = {};
+static const struct clk_ops dpll_ck_ops = {};
+static const struct clk_ops dpll_no_gate_ck_ops = {};
+const struct clk_hw_omap_ops clkhwops_omap3_dpll = {};
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2
+static const struct clk_ops omap2_dpll_core_ck_ops = {
+	.get_parent	= &omap2_init_dpll_parent,
+	.recalc_rate	= &omap2_dpllcore_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+	.set_rate	= &omap2_reprogram_dpllcore,
+};
+#else
+static const struct clk_ops omap2_dpll_core_ck_ops = {};
+#endif
+
+#ifdef CONFIG_ARCH_OMAP3
+static const struct clk_ops omap3_dpll_core_ck_ops = {
+	.get_parent	= &omap2_init_dpll_parent,
+	.recalc_rate	= &omap3_dpll_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+};
+#else
+static const struct clk_ops omap3_dpll_core_ck_ops = {};
+#endif
 
 #ifdef CONFIG_ARCH_OMAP3
 static const struct clk_ops omap3_dpll_ck_ops = {
@@ -193,14 +215,12 @@
  * @node: device node containing the DPLL info
  * @ops: ops for the DPLL
  * @ddt: DPLL data template to use
- * @init_flags: flags for controlling init types
  *
  * Initializes a DPLL clock from device tree data.
  */
 static void __init of_ti_dpll_setup(struct device_node *node,
 				    const struct clk_ops *ops,
-				    const struct dpll_data *ddt,
-				    u8 init_flags)
+				    const struct dpll_data *ddt)
 {
 	struct clk_hw_omap *clk_hw = NULL;
 	struct clk_init_data *init = NULL;
@@ -241,13 +261,30 @@
 	init->parent_names = parent_names;
 
 	dd->control_reg = ti_clk_get_reg_addr(node, 0);
-	dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
-	dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
 
-	if (!dd->control_reg || !dd->idlest_reg || !dd->mult_div1_reg)
+	/*
+	 * Special case for OMAP2 DPLL, register order is different due to
+	 * missing idlest_reg, also clkhwops is different. Detected from
+	 * missing idlest_mask.
+	 */
+	if (!dd->idlest_mask) {
+		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1);
+#ifdef CONFIG_ARCH_OMAP2
+		clk_hw->ops = &clkhwops_omap2xxx_dpll;
+		omap2xxx_clkt_dpllcore_init(&clk_hw->hw);
+#endif
+	} else {
+		dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
+		if (!dd->idlest_reg)
+			goto cleanup;
+
+		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
+	}
+
+	if (!dd->control_reg || !dd->mult_div1_reg)
 		goto cleanup;
 
-	if (init_flags & DPLL_HAS_AUTOIDLE) {
+	if (dd->autoidle_mask) {
 		dd->autoidle_reg = ti_clk_get_reg_addr(node, 3);
 		if (!dd->autoidle_reg)
 			goto cleanup;
@@ -310,7 +347,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock",
 	       of_ti_omap3_dpll_setup);
@@ -329,7 +366,7 @@
 		.freqsel_mask = 0xf0,
 	};
 
-	of_ti_dpll_setup(node, &omap3_dpll_core_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &omap3_dpll_core_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap3_core_dpll_clock, "ti,omap3-dpll-core-clock",
 	       of_ti_omap3_core_dpll_setup);
@@ -349,7 +386,7 @@
 		.modes = (1 << DPLL_LOW_POWER_STOP) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap3_per_dpll_clock, "ti,omap3-dpll-per-clock",
 	       of_ti_omap3_per_dpll_setup);
@@ -371,7 +408,7 @@
 		.modes = (1 << DPLL_LOW_POWER_STOP) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap3_per_jtype_dpll_clock, "ti,omap3-dpll-per-j-type-clock",
 	       of_ti_omap3_per_jtype_dpll_setup);
@@ -391,11 +428,32 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &dpll_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap4_dpll_clock, "ti,omap4-dpll-clock",
 	       of_ti_omap4_dpll_setup);
 
+static void __init of_ti_omap5_mpu_dpll_setup(struct device_node *node)
+{
+	const struct dpll_data dd = {
+		.idlest_mask = 0x1,
+		.enable_mask = 0x7,
+		.autoidle_mask = 0x7,
+		.mult_mask = 0x7ff << 8,
+		.div1_mask = 0x7f,
+		.max_multiplier = 2047,
+		.max_divider = 128,
+		.dcc_mask = BIT(22),
+		.dcc_rate = 1400000000, /* DCC beyond 1.4GHz */
+		.min_divider = 1,
+		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
+	};
+
+	of_ti_dpll_setup(node, &dpll_ck_ops, &dd);
+}
+CLK_OF_DECLARE(of_ti_omap5_mpu_dpll_clock, "ti,omap5-mpu-dpll-clock",
+	       of_ti_omap5_mpu_dpll_setup);
+
 static void __init of_ti_omap4_core_dpll_setup(struct device_node *node)
 {
 	const struct dpll_data dd = {
@@ -410,7 +468,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap4_core_dpll_clock, "ti,omap4-dpll-core-clock",
 	       of_ti_omap4_core_dpll_setup);
@@ -433,7 +491,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap4_m4xen_dpll_clock, "ti,omap4-dpll-m4xen-clock",
 	       of_ti_omap4_m4xen_dpll_setup);
@@ -454,7 +512,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd, DPLL_HAS_AUTOIDLE);
+	of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_omap4_jtype_dpll_clock, "ti,omap4-dpll-j-type-clock",
 	       of_ti_omap4_jtype_dpll_setup);
@@ -465,7 +523,6 @@
 	const struct dpll_data dd = {
 		.idlest_mask = 0x1,
 		.enable_mask = 0x7,
-		.autoidle_mask = 0x7,
 		.mult_mask = 0x7ff << 8,
 		.div1_mask = 0x7f,
 		.max_multiplier = 2047,
@@ -474,7 +531,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd, 0);
+	of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_am3_no_gate_dpll_clock, "ti,am3-dpll-no-gate-clock",
 	       of_ti_am3_no_gate_dpll_setup);
@@ -484,7 +541,6 @@
 	const struct dpll_data dd = {
 		.idlest_mask = 0x1,
 		.enable_mask = 0x7,
-		.autoidle_mask = 0x7,
 		.mult_mask = 0x7ff << 8,
 		.div1_mask = 0x7f,
 		.max_multiplier = 4095,
@@ -494,7 +550,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_ck_ops, &dd, 0);
+	of_ti_dpll_setup(node, &dpll_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_am3_jtype_dpll_clock, "ti,am3-dpll-j-type-clock",
 	       of_ti_am3_jtype_dpll_setup);
@@ -504,7 +560,6 @@
 	const struct dpll_data dd = {
 		.idlest_mask = 0x1,
 		.enable_mask = 0x7,
-		.autoidle_mask = 0x7,
 		.mult_mask = 0x7ff << 8,
 		.div1_mask = 0x7f,
 		.max_multiplier = 2047,
@@ -514,7 +569,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd, 0);
+	of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_am3_no_gate_jtype_dpll_clock,
 	       "ti,am3-dpll-no-gate-j-type-clock",
@@ -525,7 +580,6 @@
 	const struct dpll_data dd = {
 		.idlest_mask = 0x1,
 		.enable_mask = 0x7,
-		.autoidle_mask = 0x7,
 		.mult_mask = 0x7ff << 8,
 		.div1_mask = 0x7f,
 		.max_multiplier = 2047,
@@ -534,7 +588,7 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_ck_ops, &dd, 0);
+	of_ti_dpll_setup(node, &dpll_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_am3_dpll_clock, "ti,am3-dpll-clock", of_ti_am3_dpll_setup);
 
@@ -543,7 +597,6 @@
 	const struct dpll_data dd = {
 		.idlest_mask = 0x1,
 		.enable_mask = 0x7,
-		.autoidle_mask = 0x7,
 		.mult_mask = 0x7ff << 8,
 		.div1_mask = 0x7f,
 		.max_multiplier = 2047,
@@ -552,7 +605,22 @@
 		.modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED),
 	};
 
-	of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd, 0);
+	of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd);
 }
 CLK_OF_DECLARE(ti_am3_core_dpll_clock, "ti,am3-dpll-core-clock",
 	       of_ti_am3_core_dpll_setup);
+
+static void __init of_ti_omap2_core_dpll_setup(struct device_node *node)
+{
+	const struct dpll_data dd = {
+		.enable_mask = 0x3,
+		.mult_mask = 0x3ff << 12,
+		.div1_mask = 0xf << 8,
+		.max_divider = 16,
+		.min_divider = 1,
+	};
+
+	of_ti_dpll_setup(node, &omap2_dpll_core_ck_ops, &dd);
+}
+CLK_OF_DECLARE(ti_omap2_core_dpll_clock, "ti,omap2-dpll-core-clock",
+	       of_ti_omap2_core_dpll_setup);

diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 5873481..b326d27 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c

@@ -185,7 +185,7 @@
 CLK_OF_DECLARE(ti_composite_no_wait_gate_clk, "ti,composite-no-wait-gate-clock",
 	       of_ti_composite_no_wait_gate_clk_setup);
 
-#ifdef CONFIG_ARCH_OMAP3
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 static void __init of_ti_composite_interface_clk_setup(struct device_node *node)
 {
 	_of_ti_composite_gate_clk_setup(node, &clkhwops_iclk_wait);

diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index 320a2b1..9c3e8c4 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c

@@ -94,6 +94,7 @@
 CLK_OF_DECLARE(ti_no_wait_interface_clk, "ti,omap3-no-wait-interface-clock",
 	       of_ti_no_wait_interface_clk_setup);
 
+#ifdef CONFIG_ARCH_OMAP3
 static void __init of_ti_hsotgusb_interface_clk_setup(struct device_node *node)
 {
 	_of_ti_interface_clk_setup(node,
@@ -123,3 +124,13 @@
 }
 CLK_OF_DECLARE(ti_am35xx_interface_clk, "ti,am35xx-interface-clock",
 	       of_ti_am35xx_interface_clk_setup);
+#endif
+
+#ifdef CONFIG_SOC_OMAP2430
+static void __init of_ti_omap2430_interface_clk_setup(struct device_node *node)
+{
+	_of_ti_interface_clk_setup(node, &clkhwops_omap2430_i2chs_wait);
+}
+CLK_OF_DECLARE(ti_omap2430_interface_clk, "ti,omap2430-interface-clock",
+	       of_ti_omap2430_interface_clk_setup);
+#endif

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 719f6fb..74f5788 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c

@@ -73,12 +73,10 @@
 		return index;
 
 	new_lpcr = old_lpcr;
-	new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */
-
-	/* exit powersave upon external interrupt, but not decrementer
-	 * interrupt.
+	/* Do not exit powersave upon decrementer as we've setup the timer
+	 * offload.
 	 */
-	new_lpcr |= LPCR_PECE0;
+	new_lpcr &= ~LPCR_PECE1;
 
 	mtspr(SPRN_LPCR, new_lpcr);
 	power7_sleep();

diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 136d6a2..9634f20 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c

@@ -187,8 +187,11 @@
 
 	t1 = ktime_get();
 	local_irq_enable();
-	while (!need_resched())
-		cpu_relax();
+	if (!current_set_polling_and_test()) {
+		while (!need_resched())
+			cpu_relax();
+	}
+	current_clr_polling();
 
 	t2 = ktime_get();
 	diff = ktime_to_us(ktime_sub(t2, t1));

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index f066fa2..02f177a 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig

@@ -313,7 +313,7 @@
 
 config CRYPTO_DEV_NX
 	bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
-	depends on PPC64 && IBMVIO
+	depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN
 	default n
 	help
 	  Support for Power7+ in-Nest cryptographic acceleration.

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d9c9cb4..2ebc907 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c

@@ -2614,7 +2614,7 @@
 
 		desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
 						&of_flags);
-		if (!IS_ERR(desc))
+		if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER))
 			break;
 	}
 

diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig
index 71b9f9a..bc60dec 100644
--- a/drivers/hsi/clients/Kconfig
+++ b/drivers/hsi/clients/Kconfig

@@ -15,7 +15,7 @@
 
 config SSI_PROTOCOL
 	tristate "SSI protocol"
-	depends on HSI && PHONET && (OMAP_SSI=y || OMAP_SSI=m)
+	depends on HSI && PHONET && OMAP_SSI
 	help
 	If you say Y here, you will enable the SSI protocol aka McSAAB.
 

diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c
index b8693f0..29aea0b 100644
--- a/drivers/hsi/controllers/omap_ssi_port.c
+++ b/drivers/hsi/controllers/omap_ssi_port.c

@@ -1116,8 +1116,7 @@
 
 	dev_dbg(&pd->dev, "init ssi port...\n");
 
-	err = ref_module(THIS_MODULE, ssi->owner);
-	if (err) {
+	if (!try_module_get(ssi->owner)) {
 		dev_err(&pd->dev, "could not increment parent module refcount (err=%d)\n",
 			err);
 		return -ENODEV;
@@ -1254,6 +1253,7 @@
 
 	omap_ssi->port[omap_port->port_id] = NULL;
 	platform_set_drvdata(pd, NULL);
+	module_put(ssi->owner);
 	pm_runtime_disable(&pd->dev);
 
 	return 0;

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0034316..08531a1 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig

@@ -1124,6 +1124,16 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called sht21.
 
+config SENSORS_SHTC1
+	tristate "Sensiron humidity and temperature sensors. SHTC1 and compat."
+	depends on I2C
+	help
+	  If you say yes here you get support for the Sensiron SHTC1 and SHTW1
+	  humidity and temperature sensors.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called shtc1.
+
 config SENSORS_S3C
 	tristate "Samsung built-in ADC"
 	depends on S3C_ADC

diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 11798ad..3dc0f02 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile

@@ -126,6 +126,7 @@
 obj-$(CONFIG_SENSORS_SCH5636)	+= sch5636.o
 obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SHT21)	+= sht21.o
+obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o

diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index 6edce42..2ae8a30 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c

@@ -45,30 +45,6 @@
 
 static const unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END };
 
-static int atxp1_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id);
-static int atxp1_remove(struct i2c_client *client);
-static struct atxp1_data *atxp1_update_device(struct device *dev);
-static int atxp1_detect(struct i2c_client *client, struct i2c_board_info *info);
-
-static const struct i2c_device_id atxp1_id[] = {
-	{ "atxp1", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, atxp1_id);
-
-static struct i2c_driver atxp1_driver = {
-	.class		= I2C_CLASS_HWMON,
-	.driver = {
-		.name	= "atxp1",
-	},
-	.probe		= atxp1_probe,
-	.remove		= atxp1_remove,
-	.id_table	= atxp1_id,
-	.detect		= atxp1_detect,
-	.address_list	= normal_i2c,
-};
-
 struct atxp1_data {
 	struct device *hwmon_dev;
 	struct mutex update_lock;
@@ -386,4 +362,22 @@
 	return 0;
 };
 
+static const struct i2c_device_id atxp1_id[] = {
+	{ "atxp1", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, atxp1_id);
+
+static struct i2c_driver atxp1_driver = {
+	.class		= I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "atxp1",
+	},
+	.probe		= atxp1_probe,
+	.remove		= atxp1_remove,
+	.id_table	= atxp1_id,
+	.detect		= atxp1_detect,
+	.address_list	= normal_i2c,
+};
+
 module_i2c_driver(atxp1_driver);

diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index 93d26e8..bfd3f3e 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c

@@ -148,7 +148,8 @@
 
 	switch (reg) {
 	case INA2XX_SHUNT_VOLTAGE:
-		val = DIV_ROUND_CLOSEST(data->regs[reg],
+		/* signed register */
+		val = DIV_ROUND_CLOSEST((s16)data->regs[reg],
 					data->config->shunt_div);
 		break;
 	case INA2XX_BUS_VOLTAGE:
@@ -160,8 +161,8 @@
 		val = data->regs[reg] * data->config->power_lsb;
 		break;
 	case INA2XX_CURRENT:
-		/* LSB=1mA (selected). Is in mA */
-		val = data->regs[reg];
+		/* signed register, LSB=1mA (selected), in mA */
+		val = (s16)data->regs[reg];
 		break;
 	default:
 		/* programmer goofed */

diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index bed4af35..b0129a5 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c

@@ -5,7 +5,7 @@
  * Copyright (c) 2002, 2003  Philip Pokorny <ppokorny@penguincomputing.com>
  * Copyright (c) 2003        Margit Schubert-While <margitsw@t-online.de>
  * Copyright (c) 2004        Justin Thiessen <jthiessen@penguincomputing.com>
- * Copyright (C) 2007--2009  Jean Delvare <jdelvare@suse.de>
+ * Copyright (C) 2007--2014  Jean Delvare <jdelvare@suse.de>
  *
  * Chip details at	      <http://www.national.com/ds/LM/LM85.pdf>
  *
@@ -39,7 +39,7 @@
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
 enum chips {
-	any_chip, lm85b, lm85c,
+	lm85,
 	adm1027, adt7463, adt7468,
 	emc6d100, emc6d102, emc6d103, emc6d103s
 };
@@ -75,9 +75,6 @@
 #define LM85_COMPANY_NATIONAL		0x01
 #define LM85_COMPANY_ANALOG_DEV		0x41
 #define LM85_COMPANY_SMSC		0x5c
-#define LM85_VERSTEP_VMASK              0xf0
-#define LM85_VERSTEP_GENERIC		0x60
-#define LM85_VERSTEP_GENERIC2		0x70
 #define LM85_VERSTEP_LM85C		0x60
 #define LM85_VERSTEP_LM85B		0x62
 #define LM85_VERSTEP_LM96000_1		0x68
@@ -351,9 +348,9 @@
 	{ "adm1027", adm1027 },
 	{ "adt7463", adt7463 },
 	{ "adt7468", adt7468 },
-	{ "lm85", any_chip },
-	{ "lm85b", lm85b },
-	{ "lm85c", lm85c },
+	{ "lm85", lm85 },
+	{ "lm85b", lm85 },
+	{ "lm85c", lm85 },
 	{ "emc6d100", emc6d100 },
 	{ "emc6d101", emc6d100 },
 	{ "emc6d102", emc6d102 },
@@ -1281,7 +1278,7 @@
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int address = client->addr;
-	const char *type_name;
+	const char *type_name = NULL;
 	int company, verstep;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
@@ -1297,16 +1294,6 @@
 		"Detecting device at 0x%02x with COMPANY: 0x%02x and VERSTEP: 0x%02x\n",
 		address, company, verstep);
 
-	/* All supported chips have the version in common */
-	if ((verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC &&
-	    (verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC2) {
-		dev_dbg(&adapter->dev,
-			"Autodetection failed: unsupported version\n");
-		return -ENODEV;
-	}
-	type_name = "lm85";
-
-	/* Now, refine the detection */
 	if (company == LM85_COMPANY_NATIONAL) {
 		switch (verstep) {
 		case LM85_VERSTEP_LM85C:
@@ -1323,6 +1310,7 @@
 					"Found Winbond WPCD377I, ignoring\n");
 				return -ENODEV;
 			}
+			type_name = "lm85";
 			break;
 		}
 	} else if (company == LM85_COMPANY_ANALOG_DEV) {
@@ -1357,12 +1345,11 @@
 			type_name = "emc6d103s";
 			break;
 		}
-	} else {
-		dev_dbg(&adapter->dev,
-			"Autodetection failed: unknown vendor\n");
-		return -ENODEV;
 	}
 
+	if (!type_name)
+		return -ENODEV;
+
 	strlcpy(info->type, type_name, I2C_NAME_SIZE);
 
 	return 0;

diff --git a/drivers/hwmon/ltc4151.c b/drivers/hwmon/ltc4151.c
index af81be1..c86a184 100644
--- a/drivers/hwmon/ltc4151.c
+++ b/drivers/hwmon/ltc4151.c

@@ -47,7 +47,7 @@
 #define LTC4151_ADIN_L	0x05
 
 struct ltc4151_data {
-	struct device *hwmon_dev;
+	struct i2c_client *client;
 
 	struct mutex update_lock;
 	bool valid;
@@ -59,8 +59,8 @@
 
 static struct ltc4151_data *ltc4151_update_device(struct device *dev)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ltc4151_data *data = i2c_get_clientdata(client);
+	struct ltc4151_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
 	struct ltc4151_data *ret = data;
 
 	mutex_lock(&data->update_lock);
@@ -159,7 +159,7 @@
  * Finally, construct an array of pointers to members of the above objects,
  * as required for sysfs_create_group()
  */
-static struct attribute *ltc4151_attributes[] = {
+static struct attribute *ltc4151_attrs[] = {
 	&sensor_dev_attr_in1_input.dev_attr.attr,
 	&sensor_dev_attr_in2_input.dev_attr.attr,
 
@@ -167,54 +167,30 @@
 
 	NULL,
 };
-
-static const struct attribute_group ltc4151_group = {
-	.attrs = ltc4151_attributes,
-};
+ATTRIBUTE_GROUPS(ltc4151);
 
 static int ltc4151_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	struct i2c_adapter *adapter = client->adapter;
+	struct device *dev = &client->dev;
 	struct ltc4151_data *data;
-	int ret;
+	struct device *hwmon_dev;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -ENODEV;
 
-	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, data);
+	data->client = client;
 	mutex_init(&data->update_lock);
 
-	/* Register sysfs hooks */
-	ret = sysfs_create_group(&client->dev.kobj, &ltc4151_group);
-	if (ret)
-		return ret;
-
-	data->hwmon_dev = hwmon_device_register(&client->dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		ret = PTR_ERR(data->hwmon_dev);
-		goto out_hwmon_device_register;
-	}
-
-	return 0;
-
-out_hwmon_device_register:
-	sysfs_remove_group(&client->dev.kobj, &ltc4151_group);
-	return ret;
-}
-
-static int ltc4151_remove(struct i2c_client *client)
-{
-	struct ltc4151_data *data = i2c_get_clientdata(client);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	sysfs_remove_group(&client->dev.kobj, &ltc4151_group);
-
-	return 0;
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+							   data,
+							   ltc4151_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static const struct i2c_device_id ltc4151_id[] = {
@@ -229,7 +205,6 @@
 		.name	= "ltc4151",
 	},
 	.probe		= ltc4151_probe,
-	.remove		= ltc4151_remove,
 	.id_table	= ltc4151_id,
 };
 

diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
new file mode 100644
index 0000000..decd7df
--- /dev/null
+++ b/drivers/hwmon/shtc1.c

@@ -0,0 +1,251 @@
+/* Sensirion SHTC1 humidity and temperature sensor driver
+ *
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/platform_data/shtc1.h>
+
+/* commands (high precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_hpm[]    = { 0x7C, 0xA2 };
+static const unsigned char shtc1_cmd_measure_nonblocking_hpm[] = { 0x78, 0x66 };
+
+/* commands (low precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_lpm[]    = { 0x64, 0x58 };
+static const unsigned char shtc1_cmd_measure_nonblocking_lpm[] = { 0x60, 0x9c };
+
+/* command for reading the ID register */
+static const unsigned char shtc1_cmd_read_id_reg[]	       = { 0xef, 0xc8 };
+
+/* constants for reading the ID register */
+#define SHTC1_ID	  0x07
+#define SHTC1_ID_REG_MASK 0x1f
+
+/* delays for non-blocking i2c commands, both in us */
+#define SHTC1_NONBLOCKING_WAIT_TIME_HPM  14400
+#define SHTC1_NONBLOCKING_WAIT_TIME_LPM   1000
+
+#define SHTC1_CMD_LENGTH      2
+#define SHTC1_RESPONSE_LENGTH 6
+
+struct shtc1_data {
+	struct i2c_client *client;
+	struct mutex update_lock;
+	bool valid;
+	unsigned long last_updated; /* in jiffies */
+
+	const unsigned char *command;
+	unsigned int nonblocking_wait_time; /* in us */
+
+	struct shtc1_platform_data setup;
+
+	int temperature; /* 1000 * temperature in dgr C */
+	int humidity; /* 1000 * relative humidity in %RH */
+};
+
+static int shtc1_update_values(struct i2c_client *client,
+			       struct shtc1_data *data,
+			       char *buf, int bufsize)
+{
+	int ret = i2c_master_send(client, data->command, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(&client->dev, "failed to send command: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	/*
+	 * In blocking mode (clock stretching mode) the I2C bus
+	 * is blocked for other traffic, thus the call to i2c_master_recv()
+	 * will wait until the data is ready. For non blocking mode, we
+	 * have to wait ourselves.
+	 */
+	if (!data->setup.blocking_io)
+		usleep_range(data->nonblocking_wait_time,
+			     data->nonblocking_wait_time + 1000);
+
+	ret = i2c_master_recv(client, buf, bufsize);
+	if (ret != bufsize) {
+		dev_err(&client->dev, "failed to read values: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return 0;
+}
+
+/* sysfs attributes */
+static struct shtc1_data *shtc1_update_client(struct device *dev)
+{
+	struct shtc1_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	unsigned char buf[SHTC1_RESPONSE_LENGTH];
+	int val;
+	int ret = 0;
+
+	mutex_lock(&data->update_lock);
+
+	if (time_after(jiffies, data->last_updated + HZ / 10) || !data->valid) {
+		ret = shtc1_update_values(client, data, buf, sizeof(buf));
+		if (ret)
+			goto out;
+
+		/*
+		 * From datasheet:
+		 * T = -45 + 175 * ST / 2^16
+		 * RH = 100 * SRH / 2^16
+		 *
+		 * Adapted for integer fixed point (3 digit) arithmetic.
+		 */
+		val = be16_to_cpup((__be16 *)buf);
+		data->temperature = ((21875 * val) >> 13) - 45000;
+		val = be16_to_cpup((__be16 *)(buf + 3));
+		data->humidity = ((12500 * val) >> 13);
+
+		data->last_updated = jiffies;
+		data->valid = true;
+	}
+
+out:
+	mutex_unlock(&data->update_lock);
+
+	return ret == 0 ? data : ERR_PTR(ret);
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->temperature);
+}
+
+static ssize_t humidity1_input_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->humidity);
+}
+
+static DEVICE_ATTR_RO(temp1_input);
+static DEVICE_ATTR_RO(humidity1_input);
+
+static struct attribute *shtc1_attrs[] = {
+	&dev_attr_temp1_input.attr,
+	&dev_attr_humidity1_input.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(shtc1);
+
+static void shtc1_select_command(struct shtc1_data *data)
+{
+	if (data->setup.high_precision) {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_hpm :
+				shtc1_cmd_measure_nonblocking_hpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_HPM;
+
+	} else {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_lpm :
+				shtc1_cmd_measure_nonblocking_lpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_LPM;
+	}
+}
+
+static int shtc1_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	int ret;
+	char id_reg[2];
+	struct shtc1_data *data;
+	struct device *hwmon_dev;
+	struct i2c_adapter *adap = client->adapter;
+	struct device *dev = &client->dev;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_I2C)) {
+		dev_err(dev, "plain i2c transactions not supported\n");
+		return -ENODEV;
+	}
+
+	ret = i2c_master_send(client, shtc1_cmd_read_id_reg, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(dev, "could not send read_id_reg command: %d\n", ret);
+		return ret < 0 ? ret : -ENODEV;
+	}
+	ret = i2c_master_recv(client, id_reg, sizeof(id_reg));
+	if (ret != sizeof(id_reg)) {
+		dev_err(dev, "could not read ID register: %d\n", ret);
+		return -ENODEV;
+	}
+	if ((id_reg[1] & SHTC1_ID_REG_MASK) != SHTC1_ID) {
+		dev_err(dev, "ID register doesn't match\n");
+		return -ENODEV;
+	}
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->setup.blocking_io = false;
+	data->setup.high_precision = true;
+	data->client = client;
+
+	if (client->dev.platform_data)
+		data->setup = *(struct shtc1_platform_data *)dev->platform_data;
+	shtc1_select_command(data);
+	mutex_init(&data->update_lock);
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+							   client->name,
+							   data,
+							   shtc1_groups);
+	if (IS_ERR(hwmon_dev))
+		dev_dbg(dev, "unable to register hwmon device\n");
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+/* device ID table */
+static const struct i2c_device_id shtc1_id[] = {
+	{ "shtc1", 0 },
+	{ "shtw1", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, shtc1_id);
+
+static struct i2c_driver shtc1_i2c_driver = {
+	.driver.name  = "shtc1",
+	.probe        = shtc1_probe,
+	.id_table     = shtc1_id,
+};
+
+module_i2c_driver(shtc1_i2c_driver);
+
+MODULE_AUTHOR("Johannes Winkelmann <johannes.winkelmann@sensirion.com>");
+MODULE_DESCRIPTION("Sensirion SHTC1 humidity and temperature sensor driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/hwmon/vexpress.c b/drivers/hwmon/vexpress.c
index 611f34c..c536190 100644
--- a/drivers/hwmon/vexpress.c
+++ b/drivers/hwmon/vexpress.c

@@ -27,17 +27,8 @@
 struct vexpress_hwmon_data {
 	struct device *hwmon_dev;
 	struct regmap *reg;
-	const char *name;
 };
 
-static ssize_t vexpress_hwmon_name_show(struct device *dev,
-		struct device_attribute *dev_attr, char *buffer)
-{
-	struct vexpress_hwmon_data *data = dev_get_drvdata(dev);
-
-	return sprintf(buffer, "%s\n", data->name);
-}
-
 static ssize_t vexpress_hwmon_label_show(struct device *dev,
 		struct device_attribute *dev_attr, char *buffer)
 {
@@ -95,16 +86,6 @@
 	return attr->mode;
 }
 
-static DEVICE_ATTR(name, S_IRUGO, vexpress_hwmon_name_show, NULL);
-
-#define VEXPRESS_HWMON_ATTRS(_name, _label_attr, _input_attr)	\
-struct attribute *vexpress_hwmon_attrs_##_name[] = {		\
-	&dev_attr_name.attr,					\
-	&dev_attr_##_label_attr.attr,				\
-	&sensor_dev_attr_##_input_attr.dev_attr.attr,		\
-	NULL							\
-}
-
 struct vexpress_hwmon_type {
 	const char *name;
 	const struct attribute_group **attr_groups;
@@ -114,7 +95,11 @@
 static DEVICE_ATTR(in1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1000);
-static VEXPRESS_HWMON_ATTRS(volt, in1_label, in1_input);
+static struct attribute *vexpress_hwmon_attrs_volt[] = {
+	&dev_attr_in1_label.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_volt = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_volt,
@@ -131,7 +116,11 @@
 static DEVICE_ATTR(curr1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1000);
-static VEXPRESS_HWMON_ATTRS(amp, curr1_label, curr1_input);
+static struct attribute *vexpress_hwmon_attrs_amp[] = {
+	&dev_attr_curr1_label.attr,
+	&sensor_dev_attr_curr1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_amp = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_amp,
@@ -147,7 +136,11 @@
 static DEVICE_ATTR(temp1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1000);
-static VEXPRESS_HWMON_ATTRS(temp, temp1_label, temp1_input);
+static struct attribute *vexpress_hwmon_attrs_temp[] = {
+	&dev_attr_temp1_label.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_temp = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_temp,
@@ -163,7 +156,11 @@
 static DEVICE_ATTR(power1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, vexpress_hwmon_u32_show,
 		NULL, 1);
-static VEXPRESS_HWMON_ATTRS(power, power1_label, power1_input);
+static struct attribute *vexpress_hwmon_attrs_power[] = {
+	&dev_attr_power1_label.attr,
+	&sensor_dev_attr_power1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_power = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_power,
@@ -179,7 +176,11 @@
 static DEVICE_ATTR(energy1_label, S_IRUGO, vexpress_hwmon_label_show, NULL);
 static SENSOR_DEVICE_ATTR(energy1_input, S_IRUGO, vexpress_hwmon_u64_show,
 		NULL, 1);
-static VEXPRESS_HWMON_ATTRS(energy, energy1_label, energy1_input);
+static struct attribute *vexpress_hwmon_attrs_energy[] = {
+	&dev_attr_energy1_label.attr,
+	&sensor_dev_attr_energy1_input.dev_attr.attr,
+	NULL
+};
 static struct attribute_group vexpress_hwmon_group_energy = {
 	.is_visible = vexpress_hwmon_attr_is_visible,
 	.attrs = vexpress_hwmon_attrs_energy,
@@ -218,7 +219,6 @@
 
 static int vexpress_hwmon_probe(struct platform_device *pdev)
 {
-	int err;
 	const struct of_device_id *match;
 	struct vexpress_hwmon_data *data;
 	const struct vexpress_hwmon_type *type;
@@ -232,45 +232,19 @@
 	if (!match)
 		return -ENODEV;
 	type = match->data;
-	data->name = type->name;
 
 	data->reg = devm_regmap_init_vexpress_config(&pdev->dev);
 	if (IS_ERR(data->reg))
 		return PTR_ERR(data->reg);
 
-	err = sysfs_create_groups(&pdev->dev.kobj, type->attr_groups);
-	if (err)
-		goto error;
+	data->hwmon_dev = devm_hwmon_device_register_with_groups(&pdev->dev,
+			type->name, data, type->attr_groups);
 
-	data->hwmon_dev = hwmon_device_register(&pdev->dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		err = PTR_ERR(data->hwmon_dev);
-		goto error;
-	}
-
-	return 0;
-
-error:
-	sysfs_remove_group(&pdev->dev.kobj, match->data);
-	return err;
-}
-
-static int vexpress_hwmon_remove(struct platform_device *pdev)
-{
-	struct vexpress_hwmon_data *data = platform_get_drvdata(pdev);
-	const struct of_device_id *match;
-
-	hwmon_device_unregister(data->hwmon_dev);
-
-	match = of_match_device(vexpress_hwmon_of_match, &pdev->dev);
-	sysfs_remove_group(&pdev->dev.kobj, match->data);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(data->hwmon_dev);
 }
 
 static struct platform_driver vexpress_hwmon_driver = {
 	.probe = vexpress_hwmon_probe,
-	.remove = vexpress_hwmon_remove,
 	.driver	= {
 		.name = DRVNAME,
 		.owner = THIS_MODULE,

diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2e2d903..8d44a40 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c

@@ -41,11 +41,11 @@
 #include "iscsi_iser.h"
 
 /* Register user buffer memory and initialize passive rdma
- *  dto descriptor. Total data size is stored in
- *  iser_task->data[ISER_DIR_IN].data_len
+ *  dto descriptor. Data size is stored in
+ *  task->data[ISER_DIR_IN].data_len, Protection size
+ *  os stored in task->prot[ISER_DIR_IN].data_len
  */
-static int iser_prepare_read_cmd(struct iscsi_task *task,
-				 unsigned int edtl)
+static int iser_prepare_read_cmd(struct iscsi_task *task)
 
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
@@ -73,14 +73,6 @@
 			return err;
 	}
 
-	if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
-		iser_err("Total data length: %ld, less than EDTL: "
-			 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
-			 iser_task->data[ISER_DIR_IN].data_len, edtl,
-			 task->itt, iser_task->ib_conn);
-		return -EINVAL;
-	}
-
 	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
 	if (err) {
 		iser_err("Failed to set up Data-IN RDMA\n");
@@ -100,8 +92,9 @@
 }
 
 /* Register user buffer memory and initialize passive rdma
- *  dto descriptor. Total data size is stored in
- *  task->data[ISER_DIR_OUT].data_len
+ *  dto descriptor. Data size is stored in
+ *  task->data[ISER_DIR_OUT].data_len, Protection size
+ *  is stored at task->prot[ISER_DIR_OUT].data_len
  */
 static int
 iser_prepare_write_cmd(struct iscsi_task *task,
@@ -135,14 +128,6 @@
 			return err;
 	}
 
-	if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
-		iser_err("Total data length: %ld, less than EDTL: %d, "
-			 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
-			 iser_task->data[ISER_DIR_OUT].data_len,
-			 edtl, task->itt, task->conn);
-		return -EINVAL;
-	}
-
 	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
 	if (err != 0) {
 		iser_err("Failed to register write cmd RDMA mem\n");
@@ -417,11 +402,12 @@
 	if (scsi_prot_sg_count(sc)) {
 		prot_buf->buf  = scsi_prot_sglist(sc);
 		prot_buf->size = scsi_prot_sg_count(sc);
-		prot_buf->data_len = sc->prot_sdb->length;
+		prot_buf->data_len = data_buf->data_len >>
+				     ilog2(sc->device->sector_size) * 8;
 	}
 
 	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
-		err = iser_prepare_read_cmd(task, edtl);
+		err = iser_prepare_read_cmd(task);
 		if (err)
 			goto send_command_error;
 	}

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b9d6474..d4c7928 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c

@@ -663,8 +663,9 @@
 
 	pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi;
 	if (pi_support && !device->pi_capable) {
-		pr_err("Protection information requested but not supported\n");
-		ret = -EINVAL;
+		pr_err("Protection information requested but not supported, "
+		       "rejecting connect request\n");
+		ret = rdma_reject(cma_id, NULL, 0);
 		goto out_mr;
 	}
 
@@ -787,14 +788,12 @@
 		isert_put_conn(isert_conn);
 		return;
 	}
-	if (!isert_conn->logout_posted) {
-		pr_debug("Calling rdma_disconnect for !logout_posted from"
-			 " isert_disconnect_work\n");
+
+	if (isert_conn->disconnect) {
+		/* Send DREQ/DREP towards our initiator */
 		rdma_disconnect(isert_conn->conn_cm_id);
-		mutex_unlock(&isert_conn->conn_mutex);
-		iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
-		goto wake_up;
 	}
+
 	mutex_unlock(&isert_conn->conn_mutex);
 
 wake_up:
@@ -803,10 +802,11 @@
 }
 
 static void
-isert_disconnected_handler(struct rdma_cm_id *cma_id)
+isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
 {
 	struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context;
 
+	isert_conn->disconnect = disconnect;
 	INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
 	schedule_work(&isert_conn->conn_logout_work);
 }
@@ -815,29 +815,28 @@
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
 	int ret = 0;
+	bool disconnect = false;
 
 	pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",
 		 event->event, event->status, cma_id->context, cma_id);
 
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
-		pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n");
 		ret = isert_connect_request(cma_id, event);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
-		pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n");
 		isert_connected_handler(cma_id);
 		break;
-	case RDMA_CM_EVENT_DISCONNECTED:
-		pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n");
-		isert_disconnected_handler(cma_id);
-		break;
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-	case RDMA_CM_EVENT_ADDR_CHANGE:
+	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
+	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
+	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
+		disconnect = true;
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
+		isert_disconnected_handler(cma_id, disconnect);
 		break;
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	default:
-		pr_err("Unknown RDMA CMA event: %d\n", event->event);
+		pr_err("Unhandled RDMA CMA event: %d\n", event->event);
 		break;
 	}
 
@@ -1054,7 +1053,9 @@
 	}
 	if (!login->login_failed) {
 		if (login->login_complete) {
-			if (isert_conn->conn_device->use_fastreg) {
+			if (!conn->sess->sess_ops->SessionType &&
+			    isert_conn->conn_device->use_fastreg) {
+				/* Normal Session and fastreg is used */
 				u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi;
 
 				ret = isert_conn_create_fastreg_pool(isert_conn,
@@ -1824,11 +1825,8 @@
 		break;
 	case ISTATE_SEND_LOGOUTRSP:
 		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
-		/*
-		 * Call atomic_dec(&isert_conn->post_send_buf_count)
-		 * from isert_wait_conn()
-		 */
-		isert_conn->logout_posted = true;
+
+		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_logout_post_handler(cmd, cmd->conn);
 		break;
 	case ISTATE_SEND_TEXTRSP:
@@ -2034,6 +2032,8 @@
 	isert_conn->state = ISER_CONN_DOWN;
 	mutex_unlock(&isert_conn->conn_mutex);
 
+	iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+
 	complete(&isert_conn->conn_wait_comp_err);
 }
 
@@ -2320,7 +2320,7 @@
 	int rc;
 
 	isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
-	rc = iscsit_build_text_rsp(cmd, conn, hdr);
+	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND);
 	if (rc < 0)
 		return rc;
 
@@ -3156,9 +3156,14 @@
 		return -ENODEV;
 
 	spin_lock_bh(&np->np_thread_lock);
-	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+	if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("ISCSI_NP_THREAD_RESET for isert_accept_np\n");
+		pr_debug("np_thread_state %d for isert_accept_np\n",
+			 np->np_thread_state);
+		/**
+		 * No point in stalling here when np_thread
+		 * is in state RESET/SHUTDOWN/EXIT - bail
+		 **/
 		return -ENODEV;
 	}
 	spin_unlock_bh(&np->np_thread_lock);
@@ -3208,15 +3213,9 @@
 	struct isert_conn *isert_conn = conn->context;
 
 	pr_debug("isert_wait_conn: Starting \n");
-	/*
-	 * Decrement post_send_buf_count for special case when called
-	 * from isert_do_control_comp() -> iscsit_logout_post_handler()
-	 */
-	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->logout_posted)
-		atomic_dec(&isert_conn->post_send_buf_count);
 
-	if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) {
+	mutex_lock(&isert_conn->conn_mutex);
+	if (isert_conn->conn_cm_id) {
 		pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
 		rdma_disconnect(isert_conn->conn_cm_id);
 	}
@@ -3293,6 +3292,7 @@
 
 static void __exit isert_exit(void)
 {
+	flush_scheduled_work();
 	destroy_workqueue(isert_comp_wq);
 	destroy_workqueue(isert_rx_wq);
 	iscsit_unregister_transport(&iser_target_transport);

diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index da6612e..04f51f7 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h

@@ -116,7 +116,6 @@
 
 struct isert_conn {
 	enum iser_conn_state	state;
-	bool			logout_posted;
 	int			post_recv_buf_count;
 	atomic_t		post_send_buf_count;
 	u32			responder_resources;
@@ -151,6 +150,7 @@
 #define ISERT_COMP_BATCH_COUNT	8
 	int			conn_comp_batch;
 	struct llist_head	conn_comp_llist;
+	bool                    disconnect;
 };
 
 #define ISERT_MAX_CQ 64

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 20f1655..8108c69 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig

@@ -93,7 +93,9 @@
 
 config VIDEO_OMAP3
 	tristate "OMAP 3 Camera support"
-	depends on OMAP_IOVMM && VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3
+	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3
+	select ARM_DMA_USE_IOMMU
+	select OMAP_IOMMU
 	---help---
 	  Driver for an OMAP 3 camera controller.
 

diff --git a/drivers/media/platform/omap3isp/Makefile b/drivers/media/platform/omap3isp/Makefile
index e8847e7..254975a 100644
--- a/drivers/media/platform/omap3isp/Makefile
+++ b/drivers/media/platform/omap3isp/Makefile

@@ -3,7 +3,7 @@
 ccflags-$(CONFIG_VIDEO_OMAP3_DEBUG) += -DDEBUG
 
 omap3-isp-objs += \
-	isp.o ispqueue.o ispvideo.o \
+	isp.o ispvideo.o \
 	ispcsiphy.o ispccp2.o ispcsi2.o \
 	ispccdc.o isppreview.o ispresizer.o \
 	ispstat.o isph3a_aewb.o isph3a_af.o isphist.o

diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 06a0df4..2c7aa67 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c

@@ -69,6 +69,8 @@
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 
+#include <asm/dma-iommu.h>
+
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
 
@@ -1397,14 +1399,14 @@
 	if (isp_pipeline_is_last(me)) {
 		struct isp_video *video = pipe->output;
 		unsigned long flags;
-		spin_lock_irqsave(&video->queue->irqlock, flags);
+		spin_lock_irqsave(&video->irqlock, flags);
 		if (video->dmaqueue_flags & ISP_VIDEO_DMAQUEUE_UNDERRUN) {
-			spin_unlock_irqrestore(&video->queue->irqlock, flags);
+			spin_unlock_irqrestore(&video->irqlock, flags);
 			atomic_set(stopping, 0);
 			smp_mb();
 			return 0;
 		}
-		spin_unlock_irqrestore(&video->queue->irqlock, flags);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 		if (!wait_event_timeout(*wait, !atomic_read(stopping),
 					msecs_to_jiffies(1000))) {
 			atomic_set(stopping, 0);
@@ -1625,7 +1627,7 @@
  * Decrement the reference count on the ISP. If the last reference is released,
  * power-down all submodules, disable clocks and free temporary buffers.
  */
-void omap3isp_put(struct isp_device *isp)
+static void __omap3isp_put(struct isp_device *isp, bool save_ctx)
 {
 	if (isp == NULL)
 		return;
@@ -1634,7 +1636,7 @@
 	BUG_ON(isp->ref_count == 0);
 	if (--isp->ref_count == 0) {
 		isp_disable_interrupts(isp);
-		if (isp->domain) {
+		if (save_ctx) {
 			isp_save_ctx(isp);
 			isp->has_context = 1;
 		}
@@ -1648,6 +1650,11 @@
 	mutex_unlock(&isp->isp_mutex);
 }
 
+void omap3isp_put(struct isp_device *isp)
+{
+	__omap3isp_put(isp, true);
+}
+
 /* --------------------------------------------------------------------------
  * Platform device driver
  */
@@ -2120,6 +2127,61 @@
 	return ret;
 }
 
+static void isp_detach_iommu(struct isp_device *isp)
+{
+	arm_iommu_release_mapping(isp->mapping);
+	isp->mapping = NULL;
+	iommu_group_remove_device(isp->dev);
+}
+
+static int isp_attach_iommu(struct isp_device *isp)
+{
+	struct dma_iommu_mapping *mapping;
+	struct iommu_group *group;
+	int ret;
+
+	/* Create a device group and add the device to it. */
+	group = iommu_group_alloc();
+	if (IS_ERR(group)) {
+		dev_err(isp->dev, "failed to allocate IOMMU group\n");
+		return PTR_ERR(group);
+	}
+
+	ret = iommu_group_add_device(group, isp->dev);
+	iommu_group_put(group);
+
+	if (ret < 0) {
+		dev_err(isp->dev, "failed to add device to IPMMU group\n");
+		return ret;
+	}
+
+	/*
+	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
+	 * VAs. This will allocate a corresponding IOMMU domain.
+	 */
+	mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G);
+	if (IS_ERR(mapping)) {
+		dev_err(isp->dev, "failed to create ARM IOMMU mapping\n");
+		ret = PTR_ERR(mapping);
+		goto error;
+	}
+
+	isp->mapping = mapping;
+
+	/* Attach the ARM VA mapping to the device. */
+	ret = arm_iommu_attach_device(isp->dev, mapping);
+	if (ret < 0) {
+		dev_err(isp->dev, "failed to attach device to VA mapping\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	isp_detach_iommu(isp);
+	return ret;
+}
+
 /*
  * isp_remove - Remove ISP platform device
  * @pdev: Pointer to ISP platform device
@@ -2135,10 +2197,8 @@
 	isp_xclk_cleanup(isp);
 
 	__omap3isp_get(isp, false);
-	iommu_detach_device(isp->domain, &pdev->dev);
-	iommu_domain_free(isp->domain);
-	isp->domain = NULL;
-	omap3isp_put(isp);
+	isp_detach_iommu(isp);
+	__omap3isp_put(isp, false);
 
 	return 0;
 }
@@ -2265,39 +2325,32 @@
 		}
 	}
 
-	isp->domain = iommu_domain_alloc(pdev->dev.bus);
-	if (!isp->domain) {
-		dev_err(isp->dev, "can't alloc iommu domain\n");
-		ret = -ENOMEM;
+	/* IOMMU */
+	ret = isp_attach_iommu(isp);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to attach to IOMMU\n");
 		goto error_isp;
 	}
 
-	ret = iommu_attach_device(isp->domain, &pdev->dev);
-	if (ret) {
-		dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret);
-		ret = -EPROBE_DEFER;
-		goto free_domain;
-	}
-
 	/* Interrupt */
 	isp->irq_num = platform_get_irq(pdev, 0);
 	if (isp->irq_num <= 0) {
 		dev_err(isp->dev, "No IRQ resource\n");
 		ret = -ENODEV;
-		goto detach_dev;
+		goto error_iommu;
 	}
 
 	if (devm_request_irq(isp->dev, isp->irq_num, isp_isr, IRQF_SHARED,
 			     "OMAP3 ISP", isp)) {
 		dev_err(isp->dev, "Unable to request IRQ\n");
 		ret = -EINVAL;
-		goto detach_dev;
+		goto error_iommu;
 	}
 
 	/* Entities */
 	ret = isp_initialize_modules(isp);
 	if (ret < 0)
-		goto detach_dev;
+		goto error_iommu;
 
 	ret = isp_register_entities(isp);
 	if (ret < 0)
@@ -2310,14 +2363,11 @@
 
 error_modules:
 	isp_cleanup_modules(isp);
-detach_dev:
-	iommu_detach_device(isp->domain, &pdev->dev);
-free_domain:
-	iommu_domain_free(isp->domain);
-	isp->domain = NULL;
+error_iommu:
+	isp_detach_iommu(isp);
 error_isp:
 	isp_xclk_cleanup(isp);
-	omap3isp_put(isp);
+	__omap3isp_put(isp, false);
 error:
 	mutex_destroy(&isp->isp_mutex);
 

diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h
index 6d5e697..2c314ee 100644
--- a/drivers/media/platform/omap3isp/isp.h
+++ b/drivers/media/platform/omap3isp/isp.h

@@ -45,8 +45,6 @@
 #include "ispcsi2.h"
 #include "ispccp2.h"
 
-#define IOMMU_FLAG (IOVMF_ENDIAN_LITTLE | IOVMF_ELSZ_8)
-
 #define ISP_TOK_TERM		0xFFFFFFFF	/*
 						 * terminating token for ISP
 						 * modules reg list
@@ -152,6 +150,7 @@
  *             regions.
  * @mmio_base_phys: Array with physical L4 bus addresses for ISP register
  *                  regions.
+ * @mapping: IOMMU mapping
  * @stat_lock: Spinlock for handling statistics
  * @isp_mutex: Mutex for serializing requests to ISP.
  * @stop_failure: Indicates that an entity failed to stop.
@@ -171,7 +170,6 @@
  * @isp_res: Pointer to current settings for ISP Resizer.
  * @isp_prev: Pointer to current settings for ISP Preview.
  * @isp_ccdc: Pointer to current settings for ISP CCDC.
- * @iommu: Pointer to requested IOMMU instance for ISP.
  * @platform_cb: ISP driver callback function pointers for platform code
  *
  * This structure is used to store the OMAP ISP Information.
@@ -189,6 +187,8 @@
 	void __iomem *mmio_base[OMAP3_ISP_IOMEM_LAST];
 	unsigned long mmio_base_phys[OMAP3_ISP_IOMEM_LAST];
 
+	struct dma_iommu_mapping *mapping;
+
 	/* ISP Obj */
 	spinlock_t stat_lock;	/* common lock for statistic drivers */
 	struct mutex isp_mutex;	/* For handling ref_count field */
@@ -219,8 +219,6 @@
 
 	unsigned int sbl_resources;
 	unsigned int subclk_resources;
-
-	struct iommu_domain *domain;
 };
 
 #define v4l2_dev_to_isp_device(dev) \

diff --git a/drivers/media/platform/omap3isp/ispccdc.c b/drivers/media/platform/omap3isp/ispccdc.c
index 4d920c8..9f727d2 100644
--- a/drivers/media/platform/omap3isp/ispccdc.c
+++ b/drivers/media/platform/omap3isp/ispccdc.c

@@ -30,7 +30,6 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
-#include <linux/omap-iommu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <media/v4l2-event.h>
@@ -206,7 +205,8 @@
  * ccdc_lsc_program_table - Program Lens Shading Compensation table address.
  * @ccdc: Pointer to ISP CCDC device.
  */
-static void ccdc_lsc_program_table(struct isp_ccdc_device *ccdc, u32 addr)
+static void ccdc_lsc_program_table(struct isp_ccdc_device *ccdc,
+				   dma_addr_t addr)
 {
 	isp_reg_writel(to_isp_device(ccdc), addr,
 		       OMAP3_ISP_IOMEM_CCDC, ISPCCDC_LSC_TABLE_BASE);
@@ -333,7 +333,7 @@
 		return -EBUSY;
 
 	ccdc_lsc_setup_regs(ccdc, &req->config);
-	ccdc_lsc_program_table(ccdc, req->table);
+	ccdc_lsc_program_table(ccdc, req->table.dma);
 	return 0;
 }
 
@@ -368,11 +368,12 @@
 	if (req == NULL)
 		return;
 
-	if (req->iovm)
-		dma_unmap_sg(isp->dev, req->iovm->sgt->sgl,
-			     req->iovm->sgt->nents, DMA_TO_DEVICE);
-	if (req->table)
-		omap_iommu_vfree(isp->domain, isp->dev, req->table);
+	if (req->table.addr) {
+		sg_free_table(&req->table.sgt);
+		dma_free_coherent(isp->dev, req->config.size, req->table.addr,
+				  req->table.dma);
+	}
+
 	kfree(req);
 }
 
@@ -416,7 +417,6 @@
 	struct isp_device *isp = to_isp_device(ccdc);
 	struct ispccdc_lsc_config_req *req;
 	unsigned long flags;
-	void *table;
 	u16 update;
 	int ret;
 
@@ -444,38 +444,31 @@
 
 		req->enable = 1;
 
-		req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
-					req->config.size, IOMMU_FLAG);
-		if (IS_ERR_VALUE(req->table)) {
-			req->table = 0;
+		req->table.addr = dma_alloc_coherent(isp->dev, req->config.size,
+						     &req->table.dma,
+						     GFP_KERNEL);
+		if (req->table.addr == NULL) {
 			ret = -ENOMEM;
 			goto done;
 		}
 
-		req->iovm = omap_find_iovm_area(isp->dev, req->table);
-		if (req->iovm == NULL) {
-			ret = -ENOMEM;
+		ret = dma_get_sgtable(isp->dev, &req->table.sgt,
+				      req->table.addr, req->table.dma,
+				      req->config.size);
+		if (ret < 0)
 			goto done;
-		}
 
-		if (!dma_map_sg(isp->dev, req->iovm->sgt->sgl,
-				req->iovm->sgt->nents, DMA_TO_DEVICE)) {
-			ret = -ENOMEM;
-			req->iovm = NULL;
-			goto done;
-		}
+		dma_sync_sg_for_cpu(isp->dev, req->table.sgt.sgl,
+				    req->table.sgt.nents, DMA_TO_DEVICE);
 
-		dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl,
-				    req->iovm->sgt->nents, DMA_TO_DEVICE);
-
-		table = omap_da_to_va(isp->dev, req->table);
-		if (copy_from_user(table, config->lsc, req->config.size)) {
+		if (copy_from_user(req->table.addr, config->lsc,
+				   req->config.size)) {
 			ret = -EFAULT;
 			goto done;
 		}
 
-		dma_sync_sg_for_device(isp->dev, req->iovm->sgt->sgl,
-				       req->iovm->sgt->nents, DMA_TO_DEVICE);
+		dma_sync_sg_for_device(isp->dev, req->table.sgt.sgl,
+				       req->table.sgt.nents, DMA_TO_DEVICE);
 	}
 
 	spin_lock_irqsave(&ccdc->lsc.req_lock, flags);
@@ -584,7 +577,7 @@
 	if (!ccdc->fpc_en)
 		return;
 
-	isp_reg_writel(isp, ccdc->fpc.fpcaddr, OMAP3_ISP_IOMEM_CCDC,
+	isp_reg_writel(isp, ccdc->fpc.dma, OMAP3_ISP_IOMEM_CCDC,
 		       ISPCCDC_FPC_ADDR);
 	/* The FPNUM field must be set before enabling FPC. */
 	isp_reg_writel(isp, (ccdc->fpc.fpnum << ISPCCDC_FPC_FPNUM_SHIFT),
@@ -724,8 +717,9 @@
 	ccdc->shadow_update = 0;
 
 	if (OMAP3ISP_CCDC_FPC & ccdc_struct->update) {
-		u32 table_old = 0;
-		u32 table_new;
+		struct omap3isp_ccdc_fpc fpc;
+		struct ispccdc_fpc fpc_old = { .addr = NULL, };
+		struct ispccdc_fpc fpc_new;
 		u32 size;
 
 		if (ccdc->state != ISP_PIPELINE_STREAM_STOPPED)
@@ -734,35 +728,39 @@
 		ccdc->fpc_en = !!(OMAP3ISP_CCDC_FPC & ccdc_struct->flag);
 
 		if (ccdc->fpc_en) {
-			if (copy_from_user(&ccdc->fpc, ccdc_struct->fpc,
-					   sizeof(ccdc->fpc)))
+			if (copy_from_user(&fpc, ccdc_struct->fpc, sizeof(fpc)))
 				return -EFAULT;
 
+			size = fpc.fpnum * 4;
+
 			/*
-			 * table_new must be 64-bytes aligned, but it's
-			 * already done by omap_iommu_vmalloc().
+			 * The table address must be 64-bytes aligned, which is
+			 * guaranteed by dma_alloc_coherent().
 			 */
-			size = ccdc->fpc.fpnum * 4;
-			table_new = omap_iommu_vmalloc(isp->domain, isp->dev,
-							0, size, IOMMU_FLAG);
-			if (IS_ERR_VALUE(table_new))
+			fpc_new.fpnum = fpc.fpnum;
+			fpc_new.addr = dma_alloc_coherent(isp->dev, size,
+							  &fpc_new.dma,
+							  GFP_KERNEL);
+			if (fpc_new.addr == NULL)
 				return -ENOMEM;
 
-			if (copy_from_user(omap_da_to_va(isp->dev, table_new),
-					   (__force void __user *)
-					   ccdc->fpc.fpcaddr, size)) {
-				omap_iommu_vfree(isp->domain, isp->dev,
-								table_new);
+			if (copy_from_user(fpc_new.addr,
+					   (__force void __user *)fpc.fpcaddr,
+					   size)) {
+				dma_free_coherent(isp->dev, size, fpc_new.addr,
+						  fpc_new.dma);
 				return -EFAULT;
 			}
 
-			table_old = ccdc->fpc.fpcaddr;
-			ccdc->fpc.fpcaddr = table_new;
+			fpc_old = ccdc->fpc;
+			ccdc->fpc = fpc_new;
 		}
 
 		ccdc_configure_fpc(ccdc);
-		if (table_old != 0)
-			omap_iommu_vfree(isp->domain, isp->dev, table_old);
+
+		if (fpc_old.addr != NULL)
+			dma_free_coherent(isp->dev, fpc_old.fpnum * 4,
+					  fpc_old.addr, fpc_old.dma);
 	}
 
 	return ccdc_lsc_config(ccdc, ccdc_struct);
@@ -1523,7 +1521,7 @@
 
 	buffer = omap3isp_video_buffer_next(&ccdc->video_out);
 	if (buffer != NULL) {
-		ccdc_set_outaddr(ccdc, buffer->isp_addr);
+		ccdc_set_outaddr(ccdc, buffer->dma);
 		restart = 1;
 	}
 
@@ -1662,7 +1660,7 @@
 	if (!(ccdc->output & CCDC_OUTPUT_MEMORY))
 		return -ENODEV;
 
-	ccdc_set_outaddr(ccdc, buffer->isp_addr);
+	ccdc_set_outaddr(ccdc, buffer->dma);
 
 	/* We now have a buffer queued on the output, restart the pipeline
 	 * on the next CCDC interrupt if running in continuous mode (or when
@@ -2580,8 +2578,9 @@
 	cancel_work_sync(&ccdc->lsc.table_work);
 	ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue);
 
-	if (ccdc->fpc.fpcaddr != 0)
-		omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr);
+	if (ccdc->fpc.addr != NULL)
+		dma_free_coherent(isp->dev, ccdc->fpc.fpnum * 4, ccdc->fpc.addr,
+				  ccdc->fpc.dma);
 
 	mutex_destroy(&ccdc->ioctl_lock);
 }

diff --git a/drivers/media/platform/omap3isp/ispccdc.h b/drivers/media/platform/omap3isp/ispccdc.h
index 9d24e41..f650616 100644
--- a/drivers/media/platform/omap3isp/ispccdc.h
+++ b/drivers/media/platform/omap3isp/ispccdc.h

@@ -46,6 +46,12 @@
 
 #define	OMAP3ISP_CCDC_NEVENTS	16
 
+struct ispccdc_fpc {
+	void *addr;
+	dma_addr_t dma;
+	unsigned int fpnum;
+};
+
 enum ispccdc_lsc_state {
 	LSC_STATE_STOPPED = 0,
 	LSC_STATE_STOPPING = 1,
@@ -57,8 +63,12 @@
 	struct list_head list;
 	struct omap3isp_ccdc_lsc_config config;
 	unsigned char enable;
-	u32 table;
-	struct iovm_struct *iovm;
+
+	struct {
+		void *addr;
+		dma_addr_t dma;
+		struct sg_table sgt;
+	} table;
 };
 
 /*
@@ -136,7 +146,7 @@
 		     fpc_en:1;
 	struct omap3isp_ccdc_blcomp blcomp;
 	struct omap3isp_ccdc_bclamp clamp;
-	struct omap3isp_ccdc_fpc fpc;
+	struct ispccdc_fpc fpc;
 	struct ispccdc_lsc lsc;
 	unsigned int update;
 	unsigned int shadow_update;

diff --git a/drivers/media/platform/omap3isp/ispccp2.c b/drivers/media/platform/omap3isp/ispccp2.c
index b30b67d..f3801db 100644
--- a/drivers/media/platform/omap3isp/ispccp2.c
+++ b/drivers/media/platform/omap3isp/ispccp2.c

@@ -549,7 +549,7 @@
 
 	buffer = omap3isp_video_buffer_next(&ccp2->video_in);
 	if (buffer != NULL)
-		ccp2_set_inaddr(ccp2, buffer->isp_addr);
+		ccp2_set_inaddr(ccp2, buffer->dma);
 
 	pipe->state |= ISP_PIPELINE_IDLE_INPUT;
 
@@ -940,7 +940,7 @@
 {
 	struct isp_ccp2_device *ccp2 = &video->isp->isp_ccp2;
 
-	ccp2_set_inaddr(ccp2, buffer->isp_addr);
+	ccp2_set_inaddr(ccp2, buffer->dma);
 	return 0;
 }
 

diff --git a/drivers/media/platform/omap3isp/ispcsi2.c b/drivers/media/platform/omap3isp/ispcsi2.c
index 6205608..5a2e47e 100644
--- a/drivers/media/platform/omap3isp/ispcsi2.c
+++ b/drivers/media/platform/omap3isp/ispcsi2.c

@@ -695,7 +695,7 @@
 	if (buffer == NULL)
 		return;
 
-	csi2_set_outaddr(csi2, buffer->isp_addr);
+	csi2_set_outaddr(csi2, buffer->dma);
 	csi2_ctx_enable(isp, csi2, 0, 1);
 }
 
@@ -812,7 +812,7 @@
 	struct isp_device *isp = video->isp;
 	struct isp_csi2_device *csi2 = &isp->isp_csi2a;
 
-	csi2_set_outaddr(csi2, buffer->isp_addr);
+	csi2_set_outaddr(csi2, buffer->dma);
 
 	/*
 	 * If streaming was enabled before there was a buffer queued

diff --git a/drivers/media/platform/omap3isp/isph3a_aewb.c b/drivers/media/platform/omap3isp/isph3a_aewb.c
index 75fd82b..d6811ce 100644
--- a/drivers/media/platform/omap3isp/isph3a_aewb.c
+++ b/drivers/media/platform/omap3isp/isph3a_aewb.c

@@ -47,7 +47,7 @@
 	if (aewb->state == ISPSTAT_DISABLED)
 		return;
 
-	isp_reg_writel(aewb->isp, aewb->active_buf->iommu_addr,
+	isp_reg_writel(aewb->isp, aewb->active_buf->dma_addr,
 		       OMAP3_ISP_IOMEM_H3A, ISPH3A_AEWBUFST);
 
 	if (!aewb->update)

diff --git a/drivers/media/platform/omap3isp/isph3a_af.c b/drivers/media/platform/omap3isp/isph3a_af.c
index a0bf5af..6fc960c 100644
--- a/drivers/media/platform/omap3isp/isph3a_af.c
+++ b/drivers/media/platform/omap3isp/isph3a_af.c

@@ -51,7 +51,7 @@
 	if (af->state == ISPSTAT_DISABLED)
 		return;
 
-	isp_reg_writel(af->isp, af->active_buf->iommu_addr, OMAP3_ISP_IOMEM_H3A,
+	isp_reg_writel(af->isp, af->active_buf->dma_addr, OMAP3_ISP_IOMEM_H3A,
 		       ISPH3A_AFBUFST);
 
 	if (!af->update)

diff --git a/drivers/media/platform/omap3isp/isppreview.c b/drivers/media/platform/omap3isp/isppreview.c
index 395b2b0..720809b 100644
--- a/drivers/media/platform/omap3isp/isppreview.c
+++ b/drivers/media/platform/omap3isp/isppreview.c

@@ -1499,14 +1499,14 @@
 	if (prev->input == PREVIEW_INPUT_MEMORY) {
 		buffer = omap3isp_video_buffer_next(&prev->video_in);
 		if (buffer != NULL)
-			preview_set_inaddr(prev, buffer->isp_addr);
+			preview_set_inaddr(prev, buffer->dma);
 		pipe->state |= ISP_PIPELINE_IDLE_INPUT;
 	}
 
 	if (prev->output & PREVIEW_OUTPUT_MEMORY) {
 		buffer = omap3isp_video_buffer_next(&prev->video_out);
 		if (buffer != NULL) {
-			preview_set_outaddr(prev, buffer->isp_addr);
+			preview_set_outaddr(prev, buffer->dma);
 			restart = 1;
 		}
 		pipe->state |= ISP_PIPELINE_IDLE_OUTPUT;
@@ -1577,10 +1577,10 @@
 	struct isp_prev_device *prev = &video->isp->isp_prev;
 
 	if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-		preview_set_inaddr(prev, buffer->isp_addr);
+		preview_set_inaddr(prev, buffer->dma);
 
 	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		preview_set_outaddr(prev, buffer->isp_addr);
+		preview_set_outaddr(prev, buffer->dma);
 
 	return 0;
 }

diff --git a/drivers/media/platform/omap3isp/ispqueue.c b/drivers/media/platform/omap3isp/ispqueue.c
deleted file mode 100644
index a5e6585..0000000
--- a/drivers/media/platform/omap3isp/ispqueue.c
+++ /dev/null

@@ -1,1161 +0,0 @@
-/*
- * ispqueue.c
- *
- * TI OMAP3 ISP - Video buffers queue handling
- *
- * Copyright (C) 2010 Nokia Corporation
- *
- * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *	     Sakari Ailus <sakari.ailus@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#include <asm/cacheflush.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/poll.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ispqueue.h"
-
-/* -----------------------------------------------------------------------------
- * Video buffers management
- */
-
-/*
- * isp_video_buffer_cache_sync - Keep the buffers coherent between CPU and ISP
- *
- * The typical operation required here is Cache Invalidation across
- * the (user space) buffer address range. And this _must_ be done
- * at QBUF stage (and *only* at QBUF).
- *
- * We try to use optimal cache invalidation function:
- * - dmac_map_area:
- *    - used when the number of pages are _low_.
- *    - it becomes quite slow as the number of pages increase.
- *       - for 648x492 viewfinder (150 pages) it takes 1.3 ms.
- *       - for 5 Mpix buffer (2491 pages) it takes between 25-50 ms.
- *
- * - flush_cache_all:
- *    - used when the number of pages are _high_.
- *    - time taken in the range of 500-900 us.
- *    - has a higher penalty but, as whole dcache + icache is invalidated
- */
-/*
- * FIXME: dmac_inv_range crashes randomly on the user space buffer
- *        address. Fall back to flush_cache_all for now.
- */
-#define ISP_CACHE_FLUSH_PAGES_MAX       0
-
-static void isp_video_buffer_cache_sync(struct isp_video_buffer *buf)
-{
-	if (buf->skip_cache)
-		return;
-
-	if (buf->vbuf.m.userptr == 0 || buf->npages == 0 ||
-	    buf->npages > ISP_CACHE_FLUSH_PAGES_MAX)
-		flush_cache_all();
-	else {
-		dmac_map_area((void *)buf->vbuf.m.userptr, buf->vbuf.length,
-			      DMA_FROM_DEVICE);
-		outer_inv_range(buf->vbuf.m.userptr,
-				buf->vbuf.m.userptr + buf->vbuf.length);
-	}
-}
-
-/*
- * isp_video_buffer_lock_vma - Prevent VMAs from being unmapped
- *
- * Lock the VMAs underlying the given buffer into memory. This avoids the
- * userspace buffer mapping from being swapped out, making VIPT cache handling
- * easier.
- *
- * Note that the pages will not be freed as the buffers have been locked to
- * memory using by a call to get_user_pages(), but the userspace mapping could
- * still disappear if the VMAs are not locked. This is caused by the memory
- * management code trying to be as lock-less as possible, which results in the
- * userspace mapping manager not finding out that the pages are locked under
- * some conditions.
- */
-static int isp_video_buffer_lock_vma(struct isp_video_buffer *buf, int lock)
-{
-	struct vm_area_struct *vma;
-	unsigned long start;
-	unsigned long end;
-	int ret = 0;
-
-	if (buf->vbuf.memory == V4L2_MEMORY_MMAP)
-		return 0;
-
-	/* We can be called from workqueue context if the current task dies to
-	 * unlock the VMAs. In that case there's no current memory management
-	 * context so unlocking can't be performed, but the VMAs have been or
-	 * are getting destroyed anyway so it doesn't really matter.
-	 */
-	if (!current || !current->mm)
-		return lock ? -EINVAL : 0;
-
-	start = buf->vbuf.m.userptr;
-	end = buf->vbuf.m.userptr + buf->vbuf.length - 1;
-
-	down_write(&current->mm->mmap_sem);
-	spin_lock(&current->mm->page_table_lock);
-
-	do {
-		vma = find_vma(current->mm, start);
-		if (vma == NULL) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		if (lock)
-			vma->vm_flags |= VM_LOCKED;
-		else
-			vma->vm_flags &= ~VM_LOCKED;
-
-		start = vma->vm_end + 1;
-	} while (vma->vm_end < end);
-
-	if (lock)
-		buf->vm_flags |= VM_LOCKED;
-	else
-		buf->vm_flags &= ~VM_LOCKED;
-
-out:
-	spin_unlock(&current->mm->page_table_lock);
-	up_write(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * isp_video_buffer_sglist_kernel - Build a scatter list for a vmalloc'ed buffer
- *
- * Iterate over the vmalloc'ed area and create a scatter list entry for every
- * page.
- */
-static int isp_video_buffer_sglist_kernel(struct isp_video_buffer *buf)
-{
-	struct scatterlist *sglist;
-	unsigned int npages;
-	unsigned int i;
-	void *addr;
-
-	addr = buf->vaddr;
-	npages = PAGE_ALIGN(buf->vbuf.length) >> PAGE_SHIFT;
-
-	sglist = vmalloc(npages * sizeof(*sglist));
-	if (sglist == NULL)
-		return -ENOMEM;
-
-	sg_init_table(sglist, npages);
-
-	for (i = 0; i < npages; ++i, addr += PAGE_SIZE) {
-		struct page *page = vmalloc_to_page(addr);
-
-		if (page == NULL || PageHighMem(page)) {
-			vfree(sglist);
-			return -EINVAL;
-		}
-
-		sg_set_page(&sglist[i], page, PAGE_SIZE, 0);
-	}
-
-	buf->sglen = npages;
-	buf->sglist = sglist;
-
-	return 0;
-}
-
-/*
- * isp_video_buffer_sglist_user - Build a scatter list for a userspace buffer
- *
- * Walk the buffer pages list and create a 1:1 mapping to a scatter list.
- */
-static int isp_video_buffer_sglist_user(struct isp_video_buffer *buf)
-{
-	struct scatterlist *sglist;
-	unsigned int offset = buf->offset;
-	unsigned int i;
-
-	sglist = vmalloc(buf->npages * sizeof(*sglist));
-	if (sglist == NULL)
-		return -ENOMEM;
-
-	sg_init_table(sglist, buf->npages);
-
-	for (i = 0; i < buf->npages; ++i) {
-		if (PageHighMem(buf->pages[i])) {
-			vfree(sglist);
-			return -EINVAL;
-		}
-
-		sg_set_page(&sglist[i], buf->pages[i], PAGE_SIZE - offset,
-			    offset);
-		offset = 0;
-	}
-
-	buf->sglen = buf->npages;
-	buf->sglist = sglist;
-
-	return 0;
-}
-
-/*
- * isp_video_buffer_sglist_pfnmap - Build a scatter list for a VM_PFNMAP buffer
- *
- * Create a scatter list of physically contiguous pages starting at the buffer
- * memory physical address.
- */
-static int isp_video_buffer_sglist_pfnmap(struct isp_video_buffer *buf)
-{
-	struct scatterlist *sglist;
-	unsigned int offset = buf->offset;
-	unsigned long pfn = buf->paddr >> PAGE_SHIFT;
-	unsigned int i;
-
-	sglist = vmalloc(buf->npages * sizeof(*sglist));
-	if (sglist == NULL)
-		return -ENOMEM;
-
-	sg_init_table(sglist, buf->npages);
-
-	for (i = 0; i < buf->npages; ++i, ++pfn) {
-		sg_set_page(&sglist[i], pfn_to_page(pfn), PAGE_SIZE - offset,
-			    offset);
-		/* PFNMAP buffers will not get DMA-mapped, set the DMA address
-		 * manually.
-		 */
-		sg_dma_address(&sglist[i]) = (pfn << PAGE_SHIFT) + offset;
-		offset = 0;
-	}
-
-	buf->sglen = buf->npages;
-	buf->sglist = sglist;
-
-	return 0;
-}
-
-/*
- * isp_video_buffer_cleanup - Release pages for a userspace VMA.
- *
- * Release pages locked by a call isp_video_buffer_prepare_user and free the
- * pages table.
- */
-static void isp_video_buffer_cleanup(struct isp_video_buffer *buf)
-{
-	enum dma_data_direction direction;
-	unsigned int i;
-
-	if (buf->queue->ops->buffer_cleanup)
-		buf->queue->ops->buffer_cleanup(buf);
-
-	if (!(buf->vm_flags & VM_PFNMAP)) {
-		direction = buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE
-			  ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		dma_unmap_sg(buf->queue->dev, buf->sglist, buf->sglen,
-			     direction);
-	}
-
-	vfree(buf->sglist);
-	buf->sglist = NULL;
-	buf->sglen = 0;
-
-	if (buf->pages != NULL) {
-		isp_video_buffer_lock_vma(buf, 0);
-
-		for (i = 0; i < buf->npages; ++i)
-			page_cache_release(buf->pages[i]);
-
-		vfree(buf->pages);
-		buf->pages = NULL;
-	}
-
-	buf->npages = 0;
-	buf->skip_cache = false;
-}
-
-/*
- * isp_video_buffer_prepare_user - Pin userspace VMA pages to memory.
- *
- * This function creates a list of pages for a userspace VMA. The number of
- * pages is first computed based on the buffer size, and pages are then
- * retrieved by a call to get_user_pages.
- *
- * Pages are pinned to memory by get_user_pages, making them available for DMA
- * transfers. However, due to memory management optimization, it seems the
- * get_user_pages doesn't guarantee that the pinned pages will not be written
- * to swap and removed from the userspace mapping(s). When this happens, a page
- * fault can be generated when accessing those unmapped pages.
- *
- * If the fault is triggered by a page table walk caused by VIPT cache
- * management operations, the page fault handler might oops if the MM semaphore
- * is held, as it can't handle kernel page faults in that case. To fix that, a
- * fixup entry needs to be added to the cache management code, or the userspace
- * VMA must be locked to avoid removing pages from the userspace mapping in the
- * first place.
- *
- * If the number of pages retrieved is smaller than the number required by the
- * buffer size, the function returns -EFAULT.
- */
-static int isp_video_buffer_prepare_user(struct isp_video_buffer *buf)
-{
-	unsigned long data;
-	unsigned int first;
-	unsigned int last;
-	int ret;
-
-	data = buf->vbuf.m.userptr;
-	first = (data & PAGE_MASK) >> PAGE_SHIFT;
-	last = ((data + buf->vbuf.length - 1) & PAGE_MASK) >> PAGE_SHIFT;
-
-	buf->offset = data & ~PAGE_MASK;
-	buf->npages = last - first + 1;
-	buf->pages = vmalloc(buf->npages * sizeof(buf->pages[0]));
-	if (buf->pages == NULL)
-		return -ENOMEM;
-
-	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm, data & PAGE_MASK,
-			     buf->npages,
-			     buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE, 0,
-			     buf->pages, NULL);
-	up_read(&current->mm->mmap_sem);
-
-	if (ret != buf->npages) {
-		buf->npages = ret < 0 ? 0 : ret;
-		isp_video_buffer_cleanup(buf);
-		return -EFAULT;
-	}
-
-	ret = isp_video_buffer_lock_vma(buf, 1);
-	if (ret < 0)
-		isp_video_buffer_cleanup(buf);
-
-	return ret;
-}
-
-/*
- * isp_video_buffer_prepare_pfnmap - Validate a VM_PFNMAP userspace buffer
- *
- * Userspace VM_PFNMAP buffers are supported only if they are contiguous in
- * memory and if they span a single VMA.
- *
- * Return 0 if the buffer is valid, or -EFAULT otherwise.
- */
-static int isp_video_buffer_prepare_pfnmap(struct isp_video_buffer *buf)
-{
-	struct vm_area_struct *vma;
-	unsigned long prev_pfn;
-	unsigned long this_pfn;
-	unsigned long start;
-	unsigned long end;
-	dma_addr_t pa = 0;
-	int ret = -EFAULT;
-
-	start = buf->vbuf.m.userptr;
-	end = buf->vbuf.m.userptr + buf->vbuf.length - 1;
-
-	buf->offset = start & ~PAGE_MASK;
-	buf->npages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
-	buf->pages = NULL;
-
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, start);
-	if (vma == NULL || vma->vm_end < end)
-		goto done;
-
-	for (prev_pfn = 0; start <= end; start += PAGE_SIZE) {
-		ret = follow_pfn(vma, start, &this_pfn);
-		if (ret)
-			goto done;
-
-		if (prev_pfn == 0)
-			pa = this_pfn << PAGE_SHIFT;
-		else if (this_pfn != prev_pfn + 1) {
-			ret = -EFAULT;
-			goto done;
-		}
-
-		prev_pfn = this_pfn;
-	}
-
-	buf->paddr = pa + buf->offset;
-	ret = 0;
-
-done:
-	up_read(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * isp_video_buffer_prepare_vm_flags - Get VMA flags for a userspace address
- *
- * This function locates the VMAs for the buffer's userspace address and checks
- * that their flags match. The only flag that we need to care for at the moment
- * is VM_PFNMAP.
- *
- * The buffer vm_flags field is set to the first VMA flags.
- *
- * Return -EFAULT if no VMA can be found for part of the buffer, or if the VMAs
- * have incompatible flags.
- */
-static int isp_video_buffer_prepare_vm_flags(struct isp_video_buffer *buf)
-{
-	struct vm_area_struct *vma;
-	pgprot_t uninitialized_var(vm_page_prot);
-	unsigned long start;
-	unsigned long end;
-	int ret = -EFAULT;
-
-	start = buf->vbuf.m.userptr;
-	end = buf->vbuf.m.userptr + buf->vbuf.length - 1;
-
-	down_read(&current->mm->mmap_sem);
-
-	do {
-		vma = find_vma(current->mm, start);
-		if (vma == NULL)
-			goto done;
-
-		if (start == buf->vbuf.m.userptr) {
-			buf->vm_flags = vma->vm_flags;
-			vm_page_prot = vma->vm_page_prot;
-		}
-
-		if ((buf->vm_flags ^ vma->vm_flags) & VM_PFNMAP)
-			goto done;
-
-		if (vm_page_prot != vma->vm_page_prot)
-			goto done;
-
-		start = vma->vm_end + 1;
-	} while (vma->vm_end < end);
-
-	/* Skip cache management to enhance performances for non-cached or
-	 * write-combining buffers.
-	 */
-	if (vm_page_prot == pgprot_noncached(vm_page_prot) ||
-	    vm_page_prot == pgprot_writecombine(vm_page_prot))
-		buf->skip_cache = true;
-
-	ret = 0;
-
-done:
-	up_read(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * isp_video_buffer_prepare - Make a buffer ready for operation
- *
- * Preparing a buffer involves:
- *
- * - validating VMAs (userspace buffers only)
- * - locking pages and VMAs into memory (userspace buffers only)
- * - building page and scatter-gather lists
- * - mapping buffers for DMA operation
- * - performing driver-specific preparation
- *
- * The function must be called in userspace context with a valid mm context
- * (this excludes cleanup paths such as sys_close when the userspace process
- * segfaults).
- */
-static int isp_video_buffer_prepare(struct isp_video_buffer *buf)
-{
-	enum dma_data_direction direction;
-	int ret;
-
-	switch (buf->vbuf.memory) {
-	case V4L2_MEMORY_MMAP:
-		ret = isp_video_buffer_sglist_kernel(buf);
-		break;
-
-	case V4L2_MEMORY_USERPTR:
-		ret = isp_video_buffer_prepare_vm_flags(buf);
-		if (ret < 0)
-			return ret;
-
-		if (buf->vm_flags & VM_PFNMAP) {
-			ret = isp_video_buffer_prepare_pfnmap(buf);
-			if (ret < 0)
-				return ret;
-
-			ret = isp_video_buffer_sglist_pfnmap(buf);
-		} else {
-			ret = isp_video_buffer_prepare_user(buf);
-			if (ret < 0)
-				return ret;
-
-			ret = isp_video_buffer_sglist_user(buf);
-		}
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	if (ret < 0)
-		goto done;
-
-	if (!(buf->vm_flags & VM_PFNMAP)) {
-		direction = buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE
-			  ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		ret = dma_map_sg(buf->queue->dev, buf->sglist, buf->sglen,
-				 direction);
-		if (ret != buf->sglen) {
-			ret = -EFAULT;
-			goto done;
-		}
-	}
-
-	if (buf->queue->ops->buffer_prepare)
-		ret = buf->queue->ops->buffer_prepare(buf);
-
-done:
-	if (ret < 0) {
-		isp_video_buffer_cleanup(buf);
-		return ret;
-	}
-
-	return ret;
-}
-
-/*
- * isp_video_queue_query - Query the status of a given buffer
- *
- * Locking: must be called with the queue lock held.
- */
-static void isp_video_buffer_query(struct isp_video_buffer *buf,
-				   struct v4l2_buffer *vbuf)
-{
-	memcpy(vbuf, &buf->vbuf, sizeof(*vbuf));
-
-	if (buf->vma_use_count)
-		vbuf->flags |= V4L2_BUF_FLAG_MAPPED;
-
-	switch (buf->state) {
-	case ISP_BUF_STATE_ERROR:
-		vbuf->flags |= V4L2_BUF_FLAG_ERROR;
-		/* Fallthrough */
-	case ISP_BUF_STATE_DONE:
-		vbuf->flags |= V4L2_BUF_FLAG_DONE;
-		break;
-	case ISP_BUF_STATE_QUEUED:
-	case ISP_BUF_STATE_ACTIVE:
-		vbuf->flags |= V4L2_BUF_FLAG_QUEUED;
-		break;
-	case ISP_BUF_STATE_IDLE:
-	default:
-		break;
-	}
-}
-
-/*
- * isp_video_buffer_wait - Wait for a buffer to be ready
- *
- * In non-blocking mode, return immediately with 0 if the buffer is ready or
- * -EAGAIN if the buffer is in the QUEUED or ACTIVE state.
- *
- * In blocking mode, wait (interruptibly but with no timeout) on the buffer wait
- * queue using the same condition.
- */
-static int isp_video_buffer_wait(struct isp_video_buffer *buf, int nonblocking)
-{
-	if (nonblocking) {
-		return (buf->state != ISP_BUF_STATE_QUEUED &&
-			buf->state != ISP_BUF_STATE_ACTIVE)
-			? 0 : -EAGAIN;
-	}
-
-	return wait_event_interruptible(buf->wait,
-		buf->state != ISP_BUF_STATE_QUEUED &&
-		buf->state != ISP_BUF_STATE_ACTIVE);
-}
-
-/* -----------------------------------------------------------------------------
- * Queue management
- */
-
-/*
- * isp_video_queue_free - Free video buffers memory
- *
- * Buffers can only be freed if the queue isn't streaming and if no buffer is
- * mapped to userspace. Return -EBUSY if those conditions aren't satisfied.
- *
- * This function must be called with the queue lock held.
- */
-static int isp_video_queue_free(struct isp_video_queue *queue)
-{
-	unsigned int i;
-
-	if (queue->streaming)
-		return -EBUSY;
-
-	for (i = 0; i < queue->count; ++i) {
-		if (queue->buffers[i]->vma_use_count != 0)
-			return -EBUSY;
-	}
-
-	for (i = 0; i < queue->count; ++i) {
-		struct isp_video_buffer *buf = queue->buffers[i];
-
-		isp_video_buffer_cleanup(buf);
-
-		vfree(buf->vaddr);
-		buf->vaddr = NULL;
-
-		kfree(buf);
-		queue->buffers[i] = NULL;
-	}
-
-	INIT_LIST_HEAD(&queue->queue);
-	queue->count = 0;
-	return 0;
-}
-
-/*
- * isp_video_queue_alloc - Allocate video buffers memory
- *
- * This function must be called with the queue lock held.
- */
-static int isp_video_queue_alloc(struct isp_video_queue *queue,
-				 unsigned int nbuffers,
-				 unsigned int size, enum v4l2_memory memory)
-{
-	struct isp_video_buffer *buf;
-	unsigned int i;
-	void *mem;
-	int ret;
-
-	/* Start by freeing the buffers. */
-	ret = isp_video_queue_free(queue);
-	if (ret < 0)
-		return ret;
-
-	/* Bail out if no buffers should be allocated. */
-	if (nbuffers == 0)
-		return 0;
-
-	/* Initialize the allocated buffers. */
-	for (i = 0; i < nbuffers; ++i) {
-		buf = kzalloc(queue->bufsize, GFP_KERNEL);
-		if (buf == NULL)
-			break;
-
-		if (memory == V4L2_MEMORY_MMAP) {
-			/* Allocate video buffers memory for mmap mode. Align
-			 * the size to the page size.
-			 */
-			mem = vmalloc_32_user(PAGE_ALIGN(size));
-			if (mem == NULL) {
-				kfree(buf);
-				break;
-			}
-
-			buf->vbuf.m.offset = i * PAGE_ALIGN(size);
-			buf->vaddr = mem;
-		}
-
-		buf->vbuf.index = i;
-		buf->vbuf.length = size;
-		buf->vbuf.type = queue->type;
-		buf->vbuf.flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-		buf->vbuf.field = V4L2_FIELD_NONE;
-		buf->vbuf.memory = memory;
-
-		buf->queue = queue;
-		init_waitqueue_head(&buf->wait);
-
-		queue->buffers[i] = buf;
-	}
-
-	if (i == 0)
-		return -ENOMEM;
-
-	queue->count = i;
-	return nbuffers;
-}
-
-/**
- * omap3isp_video_queue_cleanup - Clean up the video buffers queue
- * @queue: Video buffers queue
- *
- * Free all allocated resources and clean up the video buffers queue. The queue
- * must not be busy (no ongoing video stream) and buffers must have been
- * unmapped.
- *
- * Return 0 on success or -EBUSY if the queue is busy or buffers haven't been
- * unmapped.
- */
-int omap3isp_video_queue_cleanup(struct isp_video_queue *queue)
-{
-	return isp_video_queue_free(queue);
-}
-
-/**
- * omap3isp_video_queue_init - Initialize the video buffers queue
- * @queue: Video buffers queue
- * @type: V4L2 buffer type (capture or output)
- * @ops: Driver-specific queue operations
- * @dev: Device used for DMA operations
- * @bufsize: Size of the driver-specific buffer structure
- *
- * Initialize the video buffers queue with the supplied parameters.
- *
- * The queue type must be one of V4L2_BUF_TYPE_VIDEO_CAPTURE or
- * V4L2_BUF_TYPE_VIDEO_OUTPUT. Other buffer types are not supported yet.
- *
- * Buffer objects will be allocated using the given buffer size to allow room
- * for driver-specific fields. Driver-specific buffer structures must start
- * with a struct isp_video_buffer field. Drivers with no driver-specific buffer
- * structure must pass the size of the isp_video_buffer structure in the bufsize
- * parameter.
- *
- * Return 0 on success.
- */
-int omap3isp_video_queue_init(struct isp_video_queue *queue,
-			      enum v4l2_buf_type type,
-			      const struct isp_video_queue_operations *ops,
-			      struct device *dev, unsigned int bufsize)
-{
-	INIT_LIST_HEAD(&queue->queue);
-	mutex_init(&queue->lock);
-	spin_lock_init(&queue->irqlock);
-
-	queue->type = type;
-	queue->ops = ops;
-	queue->dev = dev;
-	queue->bufsize = bufsize;
-
-	return 0;
-}
-
-/* -----------------------------------------------------------------------------
- * V4L2 operations
- */
-
-/**
- * omap3isp_video_queue_reqbufs - Allocate video buffers memory
- *
- * This function is intended to be used as a VIDIOC_REQBUFS ioctl handler. It
- * allocated video buffer objects and, for MMAP buffers, buffer memory.
- *
- * If the number of buffers is 0, all buffers are freed and the function returns
- * without performing any allocation.
- *
- * If the number of buffers is not 0, currently allocated buffers (if any) are
- * freed and the requested number of buffers are allocated. Depending on
- * driver-specific requirements and on memory availability, a number of buffer
- * smaller or bigger than requested can be allocated. This isn't considered as
- * an error.
- *
- * Return 0 on success or one of the following error codes:
- *
- * -EINVAL if the buffer type or index are invalid
- * -EBUSY if the queue is busy (streaming or buffers mapped)
- * -ENOMEM if the buffers can't be allocated due to an out-of-memory condition
- */
-int omap3isp_video_queue_reqbufs(struct isp_video_queue *queue,
-				 struct v4l2_requestbuffers *rb)
-{
-	unsigned int nbuffers = rb->count;
-	unsigned int size;
-	int ret;
-
-	if (rb->type != queue->type)
-		return -EINVAL;
-
-	queue->ops->queue_prepare(queue, &nbuffers, &size);
-	if (size == 0)
-		return -EINVAL;
-
-	nbuffers = min_t(unsigned int, nbuffers, ISP_VIDEO_MAX_BUFFERS);
-
-	mutex_lock(&queue->lock);
-
-	ret = isp_video_queue_alloc(queue, nbuffers, size, rb->memory);
-	if (ret < 0)
-		goto done;
-
-	rb->count = ret;
-	ret = 0;
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_querybuf - Query the status of a buffer in a queue
- *
- * This function is intended to be used as a VIDIOC_QUERYBUF ioctl handler. It
- * returns the status of a given video buffer.
- *
- * Return 0 on success or -EINVAL if the buffer type or index are invalid.
- */
-int omap3isp_video_queue_querybuf(struct isp_video_queue *queue,
-				  struct v4l2_buffer *vbuf)
-{
-	struct isp_video_buffer *buf;
-	int ret = 0;
-
-	if (vbuf->type != queue->type)
-		return -EINVAL;
-
-	mutex_lock(&queue->lock);
-
-	if (vbuf->index >= queue->count) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	buf = queue->buffers[vbuf->index];
-	isp_video_buffer_query(buf, vbuf);
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_qbuf - Queue a buffer
- *
- * This function is intended to be used as a VIDIOC_QBUF ioctl handler.
- *
- * The v4l2_buffer structure passed from userspace is first sanity tested. If
- * sane, the buffer is then processed and added to the main queue and, if the
- * queue is streaming, to the IRQ queue.
- *
- * Before being enqueued, USERPTR buffers are checked for address changes. If
- * the buffer has a different userspace address, the old memory area is unlocked
- * and the new memory area is locked.
- */
-int omap3isp_video_queue_qbuf(struct isp_video_queue *queue,
-			      struct v4l2_buffer *vbuf)
-{
-	struct isp_video_buffer *buf;
-	unsigned long flags;
-	int ret = -EINVAL;
-
-	if (vbuf->type != queue->type)
-		goto done;
-
-	mutex_lock(&queue->lock);
-
-	if (vbuf->index >= queue->count)
-		goto done;
-
-	buf = queue->buffers[vbuf->index];
-
-	if (vbuf->memory != buf->vbuf.memory)
-		goto done;
-
-	if (buf->state != ISP_BUF_STATE_IDLE)
-		goto done;
-
-	if (vbuf->memory == V4L2_MEMORY_USERPTR &&
-	    vbuf->length < buf->vbuf.length)
-		goto done;
-
-	if (vbuf->memory == V4L2_MEMORY_USERPTR &&
-	    vbuf->m.userptr != buf->vbuf.m.userptr) {
-		isp_video_buffer_cleanup(buf);
-		buf->vbuf.m.userptr = vbuf->m.userptr;
-		buf->prepared = 0;
-	}
-
-	if (!buf->prepared) {
-		ret = isp_video_buffer_prepare(buf);
-		if (ret < 0)
-			goto done;
-		buf->prepared = 1;
-	}
-
-	isp_video_buffer_cache_sync(buf);
-
-	buf->state = ISP_BUF_STATE_QUEUED;
-	list_add_tail(&buf->stream, &queue->queue);
-
-	if (queue->streaming) {
-		spin_lock_irqsave(&queue->irqlock, flags);
-		queue->ops->buffer_queue(buf);
-		spin_unlock_irqrestore(&queue->irqlock, flags);
-	}
-
-	ret = 0;
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_dqbuf - Dequeue a buffer
- *
- * This function is intended to be used as a VIDIOC_DQBUF ioctl handler.
- *
- * Wait until a buffer is ready to be dequeued, remove it from the queue and
- * copy its information to the v4l2_buffer structure.
- *
- * If the nonblocking argument is not zero and no buffer is ready, return
- * -EAGAIN immediately instead of waiting.
- *
- * If no buffer has been enqueued, or if the requested buffer type doesn't match
- * the queue type, return -EINVAL.
- */
-int omap3isp_video_queue_dqbuf(struct isp_video_queue *queue,
-			       struct v4l2_buffer *vbuf, int nonblocking)
-{
-	struct isp_video_buffer *buf;
-	int ret;
-
-	if (vbuf->type != queue->type)
-		return -EINVAL;
-
-	mutex_lock(&queue->lock);
-
-	if (list_empty(&queue->queue)) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	buf = list_first_entry(&queue->queue, struct isp_video_buffer, stream);
-	ret = isp_video_buffer_wait(buf, nonblocking);
-	if (ret < 0)
-		goto done;
-
-	list_del(&buf->stream);
-
-	isp_video_buffer_query(buf, vbuf);
-	buf->state = ISP_BUF_STATE_IDLE;
-	vbuf->flags &= ~V4L2_BUF_FLAG_QUEUED;
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_streamon - Start streaming
- *
- * This function is intended to be used as a VIDIOC_STREAMON ioctl handler. It
- * starts streaming on the queue and calls the buffer_queue operation for all
- * queued buffers.
- *
- * Return 0 on success.
- */
-int omap3isp_video_queue_streamon(struct isp_video_queue *queue)
-{
-	struct isp_video_buffer *buf;
-	unsigned long flags;
-
-	mutex_lock(&queue->lock);
-
-	if (queue->streaming)
-		goto done;
-
-	queue->streaming = 1;
-
-	spin_lock_irqsave(&queue->irqlock, flags);
-	list_for_each_entry(buf, &queue->queue, stream)
-		queue->ops->buffer_queue(buf);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
-
-done:
-	mutex_unlock(&queue->lock);
-	return 0;
-}
-
-/**
- * omap3isp_video_queue_streamoff - Stop streaming
- *
- * This function is intended to be used as a VIDIOC_STREAMOFF ioctl handler. It
- * stops streaming on the queue and wakes up all the buffers.
- *
- * Drivers must stop the hardware and synchronize with interrupt handlers and/or
- * delayed works before calling this function to make sure no buffer will be
- * touched by the driver and/or hardware.
- */
-void omap3isp_video_queue_streamoff(struct isp_video_queue *queue)
-{
-	struct isp_video_buffer *buf;
-	unsigned long flags;
-	unsigned int i;
-
-	mutex_lock(&queue->lock);
-
-	if (!queue->streaming)
-		goto done;
-
-	queue->streaming = 0;
-
-	spin_lock_irqsave(&queue->irqlock, flags);
-	for (i = 0; i < queue->count; ++i) {
-		buf = queue->buffers[i];
-
-		if (buf->state == ISP_BUF_STATE_ACTIVE)
-			wake_up(&buf->wait);
-
-		buf->state = ISP_BUF_STATE_IDLE;
-	}
-	spin_unlock_irqrestore(&queue->irqlock, flags);
-
-	INIT_LIST_HEAD(&queue->queue);
-
-done:
-	mutex_unlock(&queue->lock);
-}
-
-/**
- * omap3isp_video_queue_discard_done - Discard all buffers marked as DONE
- *
- * This function is intended to be used with suspend/resume operations. It
- * discards all 'done' buffers as they would be too old to be requested after
- * resume.
- *
- * Drivers must stop the hardware and synchronize with interrupt handlers and/or
- * delayed works before calling this function to make sure no buffer will be
- * touched by the driver and/or hardware.
- */
-void omap3isp_video_queue_discard_done(struct isp_video_queue *queue)
-{
-	struct isp_video_buffer *buf;
-	unsigned int i;
-
-	mutex_lock(&queue->lock);
-
-	if (!queue->streaming)
-		goto done;
-
-	for (i = 0; i < queue->count; ++i) {
-		buf = queue->buffers[i];
-
-		if (buf->state == ISP_BUF_STATE_DONE)
-			buf->state = ISP_BUF_STATE_ERROR;
-	}
-
-done:
-	mutex_unlock(&queue->lock);
-}
-
-static void isp_video_queue_vm_open(struct vm_area_struct *vma)
-{
-	struct isp_video_buffer *buf = vma->vm_private_data;
-
-	buf->vma_use_count++;
-}
-
-static void isp_video_queue_vm_close(struct vm_area_struct *vma)
-{
-	struct isp_video_buffer *buf = vma->vm_private_data;
-
-	buf->vma_use_count--;
-}
-
-static const struct vm_operations_struct isp_video_queue_vm_ops = {
-	.open = isp_video_queue_vm_open,
-	.close = isp_video_queue_vm_close,
-};
-
-/**
- * omap3isp_video_queue_mmap - Map buffers to userspace
- *
- * This function is intended to be used as an mmap() file operation handler. It
- * maps a buffer to userspace based on the VMA offset.
- *
- * Only buffers of memory type MMAP are supported.
- */
-int omap3isp_video_queue_mmap(struct isp_video_queue *queue,
-			 struct vm_area_struct *vma)
-{
-	struct isp_video_buffer *uninitialized_var(buf);
-	unsigned long size;
-	unsigned int i;
-	int ret = 0;
-
-	mutex_lock(&queue->lock);
-
-	for (i = 0; i < queue->count; ++i) {
-		buf = queue->buffers[i];
-		if ((buf->vbuf.m.offset >> PAGE_SHIFT) == vma->vm_pgoff)
-			break;
-	}
-
-	if (i == queue->count) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	size = vma->vm_end - vma->vm_start;
-
-	if (buf->vbuf.memory != V4L2_MEMORY_MMAP ||
-	    size != PAGE_ALIGN(buf->vbuf.length)) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	ret = remap_vmalloc_range(vma, buf->vaddr, 0);
-	if (ret < 0)
-		goto done;
-
-	vma->vm_ops = &isp_video_queue_vm_ops;
-	vma->vm_private_data = buf;
-	isp_video_queue_vm_open(vma);
-
-done:
-	mutex_unlock(&queue->lock);
-	return ret;
-}
-
-/**
- * omap3isp_video_queue_poll - Poll video queue state
- *
- * This function is intended to be used as a poll() file operation handler. It
- * polls the state of the video buffer at the front of the queue and returns an
- * events mask.
- *
- * If no buffer is present at the front of the queue, POLLERR is returned.
- */
-unsigned int omap3isp_video_queue_poll(struct isp_video_queue *queue,
-				       struct file *file, poll_table *wait)
-{
-	struct isp_video_buffer *buf;
-	unsigned int mask = 0;
-
-	mutex_lock(&queue->lock);
-	if (list_empty(&queue->queue)) {
-		mask |= POLLERR;
-		goto done;
-	}
-	buf = list_first_entry(&queue->queue, struct isp_video_buffer, stream);
-
-	poll_wait(file, &buf->wait, wait);
-	if (buf->state == ISP_BUF_STATE_DONE ||
-	    buf->state == ISP_BUF_STATE_ERROR) {
-		if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-			mask |= POLLIN | POLLRDNORM;
-		else
-			mask |= POLLOUT | POLLWRNORM;
-	}
-
-done:
-	mutex_unlock(&queue->lock);
-	return mask;
-}

diff --git a/drivers/media/platform/omap3isp/ispqueue.h b/drivers/media/platform/omap3isp/ispqueue.h
deleted file mode 100644
index 3e048ad..0000000
--- a/drivers/media/platform/omap3isp/ispqueue.h
+++ /dev/null

@@ -1,188 +0,0 @@
-/*
- * ispqueue.h
- *
- * TI OMAP3 ISP - Video buffers queue handling
- *
- * Copyright (C) 2010 Nokia Corporation
- *
- * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *	     Sakari Ailus <sakari.ailus@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#ifndef OMAP3_ISP_QUEUE_H
-#define OMAP3_ISP_QUEUE_H
-
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/mm_types.h>
-#include <linux/mutex.h>
-#include <linux/videodev2.h>
-#include <linux/wait.h>
-
-struct isp_video_queue;
-struct page;
-struct scatterlist;
-
-#define ISP_VIDEO_MAX_BUFFERS		16
-
-/**
- * enum isp_video_buffer_state - ISP video buffer state
- * @ISP_BUF_STATE_IDLE:	The buffer is under userspace control (dequeued
- *	or not queued yet).
- * @ISP_BUF_STATE_QUEUED: The buffer has been queued but isn't used by the
- *	device yet.
- * @ISP_BUF_STATE_ACTIVE: The buffer is in use for an active video transfer.
- * @ISP_BUF_STATE_ERROR: The device is done with the buffer and an error
- *	occurred. For capture device the buffer likely contains corrupted data or
- *	no data at all.
- * @ISP_BUF_STATE_DONE: The device is done with the buffer and no error occurred.
- *	For capture devices the buffer contains valid data.
- */
-enum isp_video_buffer_state {
-	ISP_BUF_STATE_IDLE,
-	ISP_BUF_STATE_QUEUED,
-	ISP_BUF_STATE_ACTIVE,
-	ISP_BUF_STATE_ERROR,
-	ISP_BUF_STATE_DONE,
-};
-
-/**
- * struct isp_video_buffer - ISP video buffer
- * @vma_use_count: Number of times the buffer is mmap'ed to userspace
- * @stream: List head for insertion into main queue
- * @queue: ISP buffers queue this buffer belongs to
- * @prepared: Whether the buffer has been prepared
- * @skip_cache: Whether to skip cache management operations for this buffer
- * @vaddr: Memory virtual address (for kernel buffers)
- * @vm_flags: Buffer VMA flags (for userspace buffers)
- * @offset: Offset inside the first page (for userspace buffers)
- * @npages: Number of pages (for userspace buffers)
- * @pages: Pages table (for userspace non-VM_PFNMAP buffers)
- * @paddr: Memory physical address (for userspace VM_PFNMAP buffers)
- * @sglen: Number of elements in the scatter list (for non-VM_PFNMAP buffers)
- * @sglist: Scatter list (for non-VM_PFNMAP buffers)
- * @vbuf: V4L2 buffer
- * @irqlist: List head for insertion into IRQ queue
- * @state: Current buffer state
- * @wait: Wait queue to signal buffer completion
- */
-struct isp_video_buffer {
-	unsigned long vma_use_count;
-	struct list_head stream;
-	struct isp_video_queue *queue;
-	unsigned int prepared:1;
-	bool skip_cache;
-
-	/* For kernel buffers. */
-	void *vaddr;
-
-	/* For userspace buffers. */
-	vm_flags_t vm_flags;
-	unsigned long offset;
-	unsigned int npages;
-	struct page **pages;
-	dma_addr_t paddr;
-
-	/* For all buffers except VM_PFNMAP. */
-	unsigned int sglen;
-	struct scatterlist *sglist;
-
-	/* Touched by the interrupt handler. */
-	struct v4l2_buffer vbuf;
-	struct list_head irqlist;
-	enum isp_video_buffer_state state;
-	wait_queue_head_t wait;
-};
-
-#define to_isp_video_buffer(vb)	container_of(vb, struct isp_video_buffer, vb)
-
-/**
- * struct isp_video_queue_operations - Driver-specific operations
- * @queue_prepare: Called before allocating buffers. Drivers should clamp the
- *	number of buffers according to their requirements, and must return the
- *	buffer size in bytes.
- * @buffer_prepare: Called the first time a buffer is queued, or after changing
- *	the userspace memory address for a USERPTR buffer, with the queue lock
- *	held. Drivers should perform device-specific buffer preparation (such as
- *	mapping the buffer memory in an IOMMU). This operation is optional.
- * @buffer_queue: Called when a buffer is being added to the queue with the
- *	queue irqlock spinlock held.
- * @buffer_cleanup: Called before freeing buffers, or before changing the
- *	userspace memory address for a USERPTR buffer, with the queue lock held.
- *	Drivers must perform cleanup operations required to undo the
- *	buffer_prepare call. This operation is optional.
- */
-struct isp_video_queue_operations {
-	void (*queue_prepare)(struct isp_video_queue *queue,
-			      unsigned int *nbuffers, unsigned int *size);
-	int  (*buffer_prepare)(struct isp_video_buffer *buf);
-	void (*buffer_queue)(struct isp_video_buffer *buf);
-	void (*buffer_cleanup)(struct isp_video_buffer *buf);
-};
-
-/**
- * struct isp_video_queue - ISP video buffers queue
- * @type: Type of video buffers handled by this queue
- * @ops: Queue operations
- * @dev: Device used for DMA operations
- * @bufsize: Size of a driver-specific buffer object
- * @count: Number of currently allocated buffers
- * @buffers: ISP video buffers
- * @lock: Mutex to protect access to the buffers, main queue and state
- * @irqlock: Spinlock to protect access to the IRQ queue
- * @streaming: Queue state, indicates whether the queue is streaming
- * @queue: List of all queued buffers
- */
-struct isp_video_queue {
-	enum v4l2_buf_type type;
-	const struct isp_video_queue_operations *ops;
-	struct device *dev;
-	unsigned int bufsize;
-
-	unsigned int count;
-	struct isp_video_buffer *buffers[ISP_VIDEO_MAX_BUFFERS];
-	struct mutex lock;
-	spinlock_t irqlock;
-
-	unsigned int streaming:1;
-
-	struct list_head queue;
-};
-
-int omap3isp_video_queue_cleanup(struct isp_video_queue *queue);
-int omap3isp_video_queue_init(struct isp_video_queue *queue,
-			      enum v4l2_buf_type type,
-			      const struct isp_video_queue_operations *ops,
-			      struct device *dev, unsigned int bufsize);
-
-int omap3isp_video_queue_reqbufs(struct isp_video_queue *queue,
-				 struct v4l2_requestbuffers *rb);
-int omap3isp_video_queue_querybuf(struct isp_video_queue *queue,
-				  struct v4l2_buffer *vbuf);
-int omap3isp_video_queue_qbuf(struct isp_video_queue *queue,
-			      struct v4l2_buffer *vbuf);
-int omap3isp_video_queue_dqbuf(struct isp_video_queue *queue,
-			       struct v4l2_buffer *vbuf, int nonblocking);
-int omap3isp_video_queue_streamon(struct isp_video_queue *queue);
-void omap3isp_video_queue_streamoff(struct isp_video_queue *queue);
-void omap3isp_video_queue_discard_done(struct isp_video_queue *queue);
-int omap3isp_video_queue_mmap(struct isp_video_queue *queue,
-			      struct vm_area_struct *vma);
-unsigned int omap3isp_video_queue_poll(struct isp_video_queue *queue,
-				       struct file *file, poll_table *wait);
-
-#endif /* OMAP3_ISP_QUEUE_H */

diff --git a/drivers/media/platform/omap3isp/ispresizer.c b/drivers/media/platform/omap3isp/ispresizer.c
index 86369df..6f077c2 100644
--- a/drivers/media/platform/omap3isp/ispresizer.c
+++ b/drivers/media/platform/omap3isp/ispresizer.c

@@ -1040,7 +1040,7 @@
 	 */
 	buffer = omap3isp_video_buffer_next(&res->video_out);
 	if (buffer != NULL) {
-		resizer_set_outaddr(res, buffer->isp_addr);
+		resizer_set_outaddr(res, buffer->dma);
 		restart = 1;
 	}
 
@@ -1049,7 +1049,7 @@
 	if (res->input == RESIZER_INPUT_MEMORY) {
 		buffer = omap3isp_video_buffer_next(&res->video_in);
 		if (buffer != NULL)
-			resizer_set_inaddr(res, buffer->isp_addr);
+			resizer_set_inaddr(res, buffer->dma);
 		pipe->state |= ISP_PIPELINE_IDLE_INPUT;
 	}
 
@@ -1101,7 +1101,7 @@
 	struct isp_res_device *res = &video->isp->isp_res;
 
 	if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-		resizer_set_inaddr(res, buffer->isp_addr);
+		resizer_set_inaddr(res, buffer->dma);
 
 	/*
 	 * We now have a buffer queued on the output. Despite what the
@@ -1116,7 +1116,7 @@
 	 * continuous mode or when starting the stream.
 	 */
 	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		resizer_set_outaddr(res, buffer->isp_addr);
+		resizer_set_outaddr(res, buffer->dma);
 
 	return 0;
 }

diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c
index 5707f85..e6cbc1e 100644
--- a/drivers/media/platform/omap3isp/ispstat.c
+++ b/drivers/media/platform/omap3isp/ispstat.c

@@ -26,13 +26,12 @@
  */
 
 #include <linux/dma-mapping.h>
-#include <linux/omap-iommu.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
 #include "isp.h"
 
-#define IS_COHERENT_BUF(stat)	((stat)->dma_ch >= 0)
+#define ISP_STAT_USES_DMAENGINE(stat)	((stat)->dma_ch >= 0)
 
 /*
  * MAGIC_SIZE must always be the greatest common divisor of
@@ -77,21 +76,10 @@
 					dma_addr_t, unsigned long, size_t,
 					enum dma_data_direction))
 {
-	struct device *dev = stat->isp->dev;
-	struct page *pg;
-	dma_addr_t dma_addr;
-	u32 offset;
-
-	/* Initial magic words */
-	pg = vmalloc_to_page(buf->virt_addr);
-	dma_addr = pfn_to_dma(dev, page_to_pfn(pg));
-	dma_sync(dev, dma_addr, 0, MAGIC_SIZE, dir);
-
-	/* Final magic words */
-	pg = vmalloc_to_page(buf->virt_addr + buf_size);
-	dma_addr = pfn_to_dma(dev, page_to_pfn(pg));
-	offset = ((u32)buf->virt_addr + buf_size) & ~PAGE_MASK;
-	dma_sync(dev, dma_addr, offset, MAGIC_SIZE, dir);
+	/* Sync the initial and final magic words. */
+	dma_sync(stat->isp->dev, buf->dma_addr, 0, MAGIC_SIZE, dir);
+	dma_sync(stat->isp->dev, buf->dma_addr + (buf_size & PAGE_MASK),
+		 buf_size & ~PAGE_MASK, MAGIC_SIZE, dir);
 }
 
 static void isp_stat_buf_sync_magic_for_device(struct ispstat *stat,
@@ -99,7 +87,7 @@
 					       u32 buf_size,
 					       enum dma_data_direction dir)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
 	__isp_stat_buf_sync_magic(stat, buf, buf_size, dir,
@@ -111,7 +99,7 @@
 					    u32 buf_size,
 					    enum dma_data_direction dir)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
 	__isp_stat_buf_sync_magic(stat, buf, buf_size, dir,
@@ -180,21 +168,21 @@
 static void isp_stat_buf_sync_for_device(struct ispstat *stat,
 					 struct ispstat_buffer *buf)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
-	dma_sync_sg_for_device(stat->isp->dev, buf->iovm->sgt->sgl,
-			       buf->iovm->sgt->nents, DMA_FROM_DEVICE);
+	dma_sync_sg_for_device(stat->isp->dev, buf->sgt.sgl,
+			       buf->sgt.nents, DMA_FROM_DEVICE);
 }
 
 static void isp_stat_buf_sync_for_cpu(struct ispstat *stat,
 				      struct ispstat_buffer *buf)
 {
-	if (IS_COHERENT_BUF(stat))
+	if (ISP_STAT_USES_DMAENGINE(stat))
 		return;
 
-	dma_sync_sg_for_cpu(stat->isp->dev, buf->iovm->sgt->sgl,
-			    buf->iovm->sgt->nents, DMA_FROM_DEVICE);
+	dma_sync_sg_for_cpu(stat->isp->dev, buf->sgt.sgl,
+			    buf->sgt.nents, DMA_FROM_DEVICE);
 }
 
 static void isp_stat_buf_clear(struct ispstat *stat)
@@ -354,29 +342,21 @@
 
 static void isp_stat_bufs_free(struct ispstat *stat)
 {
-	struct isp_device *isp = stat->isp;
-	int i;
+	struct device *dev = ISP_STAT_USES_DMAENGINE(stat)
+			   ? NULL : stat->isp->dev;
+	unsigned int i;
 
 	for (i = 0; i < STAT_MAX_BUFS; i++) {
 		struct ispstat_buffer *buf = &stat->buf[i];
 
-		if (!IS_COHERENT_BUF(stat)) {
-			if (IS_ERR_OR_NULL((void *)buf->iommu_addr))
-				continue;
-			if (buf->iovm)
-				dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl,
-					     buf->iovm->sgt->nents,
-					     DMA_FROM_DEVICE);
-			omap_iommu_vfree(isp->domain, isp->dev,
-							buf->iommu_addr);
-		} else {
-			if (!buf->virt_addr)
-				continue;
-			dma_free_coherent(stat->isp->dev, stat->buf_alloc_size,
-					  buf->virt_addr, buf->dma_addr);
-		}
-		buf->iommu_addr = 0;
-		buf->iovm = NULL;
+		if (!buf->virt_addr)
+			continue;
+
+		sg_free_table(&buf->sgt);
+
+		dma_free_coherent(dev, stat->buf_alloc_size, buf->virt_addr,
+				  buf->dma_addr);
+
 		buf->dma_addr = 0;
 		buf->virt_addr = NULL;
 		buf->empty = 1;
@@ -389,83 +369,51 @@
 	stat->active_buf = NULL;
 }
 
-static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
+static int isp_stat_bufs_alloc_one(struct device *dev,
+				   struct ispstat_buffer *buf,
+				   unsigned int size)
 {
-	struct isp_device *isp = stat->isp;
-	int i;
+	int ret;
 
-	stat->buf_alloc_size = size;
+	buf->virt_addr = dma_alloc_coherent(dev, size, &buf->dma_addr,
+					    GFP_KERNEL | GFP_DMA);
+	if (!buf->virt_addr)
+		return -ENOMEM;
 
-	for (i = 0; i < STAT_MAX_BUFS; i++) {
-		struct ispstat_buffer *buf = &stat->buf[i];
-		struct iovm_struct *iovm;
-
-		WARN_ON(buf->dma_addr);
-		buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
-							size, IOMMU_FLAG);
-		if (IS_ERR((void *)buf->iommu_addr)) {
-			dev_err(stat->isp->dev,
-				 "%s: Can't acquire memory for "
-				 "buffer %d\n", stat->subdev.name, i);
-			isp_stat_bufs_free(stat);
-			return -ENOMEM;
-		}
-
-		iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr);
-		if (!iovm ||
-		    !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents,
-				DMA_FROM_DEVICE)) {
-			isp_stat_bufs_free(stat);
-			return -ENOMEM;
-		}
-		buf->iovm = iovm;
-
-		buf->virt_addr = omap_da_to_va(stat->isp->dev,
-					  (u32)buf->iommu_addr);
-		buf->empty = 1;
-		dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
-			"iommu_addr=0x%08lx virt_addr=0x%08lx",
-			stat->subdev.name, i, buf->iommu_addr,
-			(unsigned long)buf->virt_addr);
+	ret = dma_get_sgtable(dev, &buf->sgt, buf->virt_addr, buf->dma_addr,
+			      size);
+	if (ret < 0) {
+		dma_free_coherent(dev, size, buf->virt_addr, buf->dma_addr);
+		buf->virt_addr = NULL;
+		buf->dma_addr = 0;
+		return ret;
 	}
 
 	return 0;
 }
 
-static int isp_stat_bufs_alloc_dma(struct ispstat *stat, unsigned int size)
-{
-	int i;
-
-	stat->buf_alloc_size = size;
-
-	for (i = 0; i < STAT_MAX_BUFS; i++) {
-		struct ispstat_buffer *buf = &stat->buf[i];
-
-		WARN_ON(buf->iommu_addr);
-		buf->virt_addr = dma_alloc_coherent(stat->isp->dev, size,
-					&buf->dma_addr, GFP_KERNEL | GFP_DMA);
-
-		if (!buf->virt_addr || !buf->dma_addr) {
-			dev_info(stat->isp->dev,
-				 "%s: Can't acquire memory for "
-				 "DMA buffer %d\n", stat->subdev.name, i);
-			isp_stat_bufs_free(stat);
-			return -ENOMEM;
-		}
-		buf->empty = 1;
-
-		dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
-			"dma_addr=0x%08lx virt_addr=0x%08lx\n",
-			stat->subdev.name, i, (unsigned long)buf->dma_addr,
-			(unsigned long)buf->virt_addr);
-	}
-
-	return 0;
-}
-
+/*
+ * The device passed to the DMA API depends on whether the statistics block uses
+ * ISP DMA, external DMA or PIO to transfer data.
+ *
+ * The first case (for the AEWB and AF engines) passes the ISP device, resulting
+ * in the DMA buffers being mapped through the ISP IOMMU.
+ *
+ * The second case (for the histogram engine) should pass the DMA engine device.
+ * As that device isn't accessible through the OMAP DMA engine API the driver
+ * passes NULL instead, resulting in the buffers being mapped directly as
+ * physical pages.
+ *
+ * The third case (for the histogram engine) doesn't require any mapping. The
+ * buffers could be allocated with kmalloc/vmalloc, but we still use
+ * dma_alloc_coherent() for consistency purpose.
+ */
 static int isp_stat_bufs_alloc(struct ispstat *stat, u32 size)
 {
+	struct device *dev = ISP_STAT_USES_DMAENGINE(stat)
+			   ? NULL : stat->isp->dev;
 	unsigned long flags;
+	unsigned int i;
 
 	spin_lock_irqsave(&stat->isp->stat_lock, flags);
 
@@ -489,10 +437,31 @@
 
 	isp_stat_bufs_free(stat);
 
-	if (IS_COHERENT_BUF(stat))
-		return isp_stat_bufs_alloc_dma(stat, size);
-	else
-		return isp_stat_bufs_alloc_iommu(stat, size);
+	stat->buf_alloc_size = size;
+
+	for (i = 0; i < STAT_MAX_BUFS; i++) {
+		struct ispstat_buffer *buf = &stat->buf[i];
+		int ret;
+
+		ret = isp_stat_bufs_alloc_one(dev, buf, size);
+		if (ret < 0) {
+			dev_err(stat->isp->dev,
+				"%s: Failed to allocate DMA buffer %u\n",
+				stat->subdev.name, i);
+			isp_stat_bufs_free(stat);
+			return ret;
+		}
+
+		buf->empty = 1;
+
+		dev_dbg(stat->isp->dev,
+			"%s: buffer[%u] allocated. dma=0x%08lx virt=0x%08lx",
+			stat->subdev.name, i,
+			(unsigned long)buf->dma_addr,
+			(unsigned long)buf->virt_addr);
+	}
+
+	return 0;
 }
 
 static void isp_stat_queue_event(struct ispstat *stat, int err)

diff --git a/drivers/media/platform/omap3isp/ispstat.h b/drivers/media/platform/omap3isp/ispstat.h
index 9a047c9..58d6ac7 100644
--- a/drivers/media/platform/omap3isp/ispstat.h
+++ b/drivers/media/platform/omap3isp/ispstat.h

@@ -46,8 +46,7 @@
 struct ispstat;
 
 struct ispstat_buffer {
-	unsigned long iommu_addr;
-	struct iovm_struct *iovm;
+	struct sg_table sgt;
 	void *virt_addr;
 	dma_addr_t dma_addr;
 	struct timespec ts;

diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index 85b4036..e36bac2 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c

@@ -27,7 +27,6 @@
 #include <linux/clk.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/omap-iommu.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
@@ -35,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
+#include <media/videobuf2-dma-contig.h>
 
 #include "ispvideo.h"
 #include "isp.h"
@@ -326,90 +326,36 @@
 }
 
 /* -----------------------------------------------------------------------------
- * IOMMU management
- */
-
-#define IOMMU_FLAG	(IOVMF_ENDIAN_LITTLE | IOVMF_ELSZ_8)
-
-/*
- * ispmmu_vmap - Wrapper for Virtual memory mapping of a scatter gather list
- * @isp: Device pointer specific to the OMAP3 ISP.
- * @sglist: Pointer to source Scatter gather list to allocate.
- * @sglen: Number of elements of the scatter-gatter list.
- *
- * Returns a resulting mapped device address by the ISP MMU, or -ENOMEM if
- * we ran out of memory.
- */
-static dma_addr_t
-ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen)
-{
-	struct sg_table *sgt;
-	u32 da;
-
-	sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
-	if (sgt == NULL)
-		return -ENOMEM;
-
-	sgt->sgl = (struct scatterlist *)sglist;
-	sgt->nents = sglen;
-	sgt->orig_nents = sglen;
-
-	da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG);
-	if (IS_ERR_VALUE(da))
-		kfree(sgt);
-
-	return da;
-}
-
-/*
- * ispmmu_vunmap - Unmap a device address from the ISP MMU
- * @isp: Device pointer specific to the OMAP3 ISP.
- * @da: Device address generated from a ispmmu_vmap call.
- */
-static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da)
-{
-	struct sg_table *sgt;
-
-	sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da);
-	kfree(sgt);
-}
-
-/* -----------------------------------------------------------------------------
  * Video queue operations
  */
 
-static void isp_video_queue_prepare(struct isp_video_queue *queue,
-				    unsigned int *nbuffers, unsigned int *size)
+static int isp_video_queue_setup(struct vb2_queue *queue,
+				 const struct v4l2_format *fmt,
+				 unsigned int *count, unsigned int *num_planes,
+				 unsigned int sizes[], void *alloc_ctxs[])
 {
-	struct isp_video_fh *vfh =
-		container_of(queue, struct isp_video_fh, queue);
+	struct isp_video_fh *vfh = vb2_get_drv_priv(queue);
 	struct isp_video *video = vfh->video;
 
-	*size = vfh->format.fmt.pix.sizeimage;
-	if (*size == 0)
-		return;
+	*num_planes = 1;
 
-	*nbuffers = min(*nbuffers, video->capture_mem / PAGE_ALIGN(*size));
+	sizes[0] = vfh->format.fmt.pix.sizeimage;
+	if (sizes[0] == 0)
+		return -EINVAL;
+
+	alloc_ctxs[0] = video->alloc_ctx;
+
+	*count = min(*count, video->capture_mem / PAGE_ALIGN(sizes[0]));
+
+	return 0;
 }
 
-static void isp_video_buffer_cleanup(struct isp_video_buffer *buf)
+static int isp_video_buffer_prepare(struct vb2_buffer *buf)
 {
-	struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue);
+	struct isp_video_fh *vfh = vb2_get_drv_priv(buf->vb2_queue);
 	struct isp_buffer *buffer = to_isp_buffer(buf);
 	struct isp_video *video = vfh->video;
-
-	if (buffer->isp_addr) {
-		ispmmu_vunmap(video->isp, buffer->isp_addr);
-		buffer->isp_addr = 0;
-	}
-}
-
-static int isp_video_buffer_prepare(struct isp_video_buffer *buf)
-{
-	struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue);
-	struct isp_buffer *buffer = to_isp_buffer(buf);
-	struct isp_video *video = vfh->video;
-	unsigned long addr;
+	dma_addr_t addr;
 
 	/* Refuse to prepare the buffer is the video node has registered an
 	 * error. We don't need to take any lock here as the operation is
@@ -420,19 +366,16 @@
 	if (unlikely(video->error))
 		return -EIO;
 
-	addr = ispmmu_vmap(video->isp, buf->sglist, buf->sglen);
-	if (IS_ERR_VALUE(addr))
-		return -EIO;
-
+	addr = vb2_dma_contig_plane_dma_addr(buf, 0);
 	if (!IS_ALIGNED(addr, 32)) {
-		dev_dbg(video->isp->dev, "Buffer address must be "
-			"aligned to 32 bytes boundary.\n");
-		ispmmu_vunmap(video->isp, buffer->isp_addr);
+		dev_dbg(video->isp->dev,
+			"Buffer address must be aligned to 32 bytes boundary.\n");
 		return -EINVAL;
 	}
 
-	buf->vbuf.bytesused = vfh->format.fmt.pix.sizeimage;
-	buffer->isp_addr = addr;
+	vb2_set_plane_payload(&buffer->vb, 0, vfh->format.fmt.pix.sizeimage);
+	buffer->dma = addr;
+
 	return 0;
 }
 
@@ -445,9 +388,9 @@
  * If the pipeline is busy, it will be restarted in the output module interrupt
  * handler.
  */
-static void isp_video_buffer_queue(struct isp_video_buffer *buf)
+static void isp_video_buffer_queue(struct vb2_buffer *buf)
 {
-	struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue);
+	struct isp_video_fh *vfh = vb2_get_drv_priv(buf->vb2_queue);
 	struct isp_buffer *buffer = to_isp_buffer(buf);
 	struct isp_video *video = vfh->video;
 	struct isp_pipeline *pipe = to_isp_pipeline(&video->video.entity);
@@ -456,14 +399,18 @@
 	unsigned int empty;
 	unsigned int start;
 
+	spin_lock_irqsave(&video->irqlock, flags);
+
 	if (unlikely(video->error)) {
-		buf->state = ISP_BUF_STATE_ERROR;
-		wake_up(&buf->wait);
+		vb2_buffer_done(&buffer->vb, VB2_BUF_STATE_ERROR);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 		return;
 	}
 
 	empty = list_empty(&video->dmaqueue);
-	list_add_tail(&buffer->buffer.irqlist, &video->dmaqueue);
+	list_add_tail(&buffer->irqlist, &video->dmaqueue);
+
+	spin_unlock_irqrestore(&video->irqlock, flags);
 
 	if (empty) {
 		if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
@@ -487,23 +434,22 @@
 	}
 }
 
-static const struct isp_video_queue_operations isp_video_queue_ops = {
-	.queue_prepare = &isp_video_queue_prepare,
-	.buffer_prepare = &isp_video_buffer_prepare,
-	.buffer_queue = &isp_video_buffer_queue,
-	.buffer_cleanup = &isp_video_buffer_cleanup,
+static const struct vb2_ops isp_video_queue_ops = {
+	.queue_setup = isp_video_queue_setup,
+	.buf_prepare = isp_video_buffer_prepare,
+	.buf_queue = isp_video_buffer_queue,
 };
 
 /*
  * omap3isp_video_buffer_next - Complete the current buffer and return the next
  * @video: ISP video object
  *
- * Remove the current video buffer from the DMA queue and fill its timestamp,
- * field count and state fields before waking up its completion handler.
+ * Remove the current video buffer from the DMA queue and fill its timestamp and
+ * field count before handing it back to videobuf2.
  *
- * For capture video nodes the buffer state is set to ISP_BUF_STATE_DONE if no
- * error has been flagged in the pipeline, or to ISP_BUF_STATE_ERROR otherwise.
- * For video output nodes the buffer state is always set to ISP_BUF_STATE_DONE.
+ * For capture video nodes the buffer state is set to VB2_BUF_STATE_DONE if no
+ * error has been flagged in the pipeline, or to VB2_BUF_STATE_ERROR otherwise.
+ * For video output nodes the buffer state is always set to VB2_BUF_STATE_DONE.
  *
  * The DMA queue is expected to contain at least one buffer.
  *
@@ -513,26 +459,25 @@
 struct isp_buffer *omap3isp_video_buffer_next(struct isp_video *video)
 {
 	struct isp_pipeline *pipe = to_isp_pipeline(&video->video.entity);
-	struct isp_video_queue *queue = video->queue;
 	enum isp_pipeline_state state;
-	struct isp_video_buffer *buf;
+	struct isp_buffer *buf;
 	unsigned long flags;
 	struct timespec ts;
 
-	spin_lock_irqsave(&queue->irqlock, flags);
+	spin_lock_irqsave(&video->irqlock, flags);
 	if (WARN_ON(list_empty(&video->dmaqueue))) {
-		spin_unlock_irqrestore(&queue->irqlock, flags);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 		return NULL;
 	}
 
-	buf = list_first_entry(&video->dmaqueue, struct isp_video_buffer,
+	buf = list_first_entry(&video->dmaqueue, struct isp_buffer,
 			       irqlist);
 	list_del(&buf->irqlist);
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	spin_unlock_irqrestore(&video->irqlock, flags);
 
 	ktime_get_ts(&ts);
-	buf->vbuf.timestamp.tv_sec = ts.tv_sec;
-	buf->vbuf.timestamp.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+	buf->vb.v4l2_buf.timestamp.tv_sec = ts.tv_sec;
+	buf->vb.v4l2_buf.timestamp.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
 
 	/* Do frame number propagation only if this is the output video node.
 	 * Frame number either comes from the CSI receivers or it gets
@@ -541,22 +486,27 @@
 	 * first, so the input number might lag behind by 1 in some cases.
 	 */
 	if (video == pipe->output && !pipe->do_propagation)
-		buf->vbuf.sequence = atomic_inc_return(&pipe->frame_number);
+		buf->vb.v4l2_buf.sequence =
+			atomic_inc_return(&pipe->frame_number);
 	else
-		buf->vbuf.sequence = atomic_read(&pipe->frame_number);
+		buf->vb.v4l2_buf.sequence = atomic_read(&pipe->frame_number);
 
 	/* Report pipeline errors to userspace on the capture device side. */
-	if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->error) {
-		buf->state = ISP_BUF_STATE_ERROR;
+	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->error) {
+		state = VB2_BUF_STATE_ERROR;
 		pipe->error = false;
 	} else {
-		buf->state = ISP_BUF_STATE_DONE;
+		state = VB2_BUF_STATE_DONE;
 	}
 
-	wake_up(&buf->wait);
+	vb2_buffer_done(&buf->vb, state);
+
+	spin_lock_irqsave(&video->irqlock, flags);
 
 	if (list_empty(&video->dmaqueue)) {
-		if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		spin_unlock_irqrestore(&video->irqlock, flags);
+
+		if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
 			state = ISP_PIPELINE_QUEUE_OUTPUT
 			      | ISP_PIPELINE_STREAM;
 		else
@@ -571,16 +521,19 @@
 		return NULL;
 	}
 
-	if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input != NULL) {
-		spin_lock_irqsave(&pipe->lock, flags);
+	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input != NULL) {
+		spin_lock(&pipe->lock);
 		pipe->state &= ~ISP_PIPELINE_STREAM;
-		spin_unlock_irqrestore(&pipe->lock, flags);
+		spin_unlock(&pipe->lock);
 	}
 
-	buf = list_first_entry(&video->dmaqueue, struct isp_video_buffer,
+	buf = list_first_entry(&video->dmaqueue, struct isp_buffer,
 			       irqlist);
-	buf->state = ISP_BUF_STATE_ACTIVE;
-	return to_isp_buffer(buf);
+	buf->vb.state = VB2_BUF_STATE_ACTIVE;
+
+	spin_unlock_irqrestore(&video->irqlock, flags);
+
+	return buf;
 }
 
 /*
@@ -592,25 +545,22 @@
  */
 void omap3isp_video_cancel_stream(struct isp_video *video)
 {
-	struct isp_video_queue *queue = video->queue;
 	unsigned long flags;
 
-	spin_lock_irqsave(&queue->irqlock, flags);
+	spin_lock_irqsave(&video->irqlock, flags);
 
 	while (!list_empty(&video->dmaqueue)) {
-		struct isp_video_buffer *buf;
+		struct isp_buffer *buf;
 
 		buf = list_first_entry(&video->dmaqueue,
-				       struct isp_video_buffer, irqlist);
+				       struct isp_buffer, irqlist);
 		list_del(&buf->irqlist);
-
-		buf->state = ISP_BUF_STATE_ERROR;
-		wake_up(&buf->wait);
+		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR);
 	}
 
 	video->error = true;
 
-	spin_unlock_irqrestore(&queue->irqlock, flags);
+	spin_unlock_irqrestore(&video->irqlock, flags);
 }
 
 /*
@@ -627,12 +577,15 @@
 {
 	struct isp_buffer *buf = NULL;
 
-	if (continuous && video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		omap3isp_video_queue_discard_done(video->queue);
+	if (continuous && video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+		mutex_lock(&video->queue_lock);
+		vb2_discard_done(video->queue);
+		mutex_unlock(&video->queue_lock);
+	}
 
 	if (!list_empty(&video->dmaqueue)) {
 		buf = list_first_entry(&video->dmaqueue,
-				       struct isp_buffer, buffer.irqlist);
+				       struct isp_buffer, irqlist);
 		video->ops->queue(video, buf);
 		video->dmaqueue_flags |= ISP_VIDEO_DMAQUEUE_QUEUED;
 	} else {
@@ -840,33 +793,56 @@
 isp_video_reqbufs(struct file *file, void *fh, struct v4l2_requestbuffers *rb)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_reqbufs(&vfh->queue, rb);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_reqbufs(&vfh->queue, rb);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int
 isp_video_querybuf(struct file *file, void *fh, struct v4l2_buffer *b)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_querybuf(&vfh->queue, b);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_querybuf(&vfh->queue, b);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int
 isp_video_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_qbuf(&vfh->queue, b);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_qbuf(&vfh->queue, b);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int
 isp_video_dqbuf(struct file *file, void *fh, struct v4l2_buffer *b)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(fh);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_dqbuf(&vfh->queue, b,
-					  file->f_flags & O_NONBLOCK);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_dqbuf(&vfh->queue, b, file->f_flags & O_NONBLOCK);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int isp_video_check_external_subdevs(struct isp_video *video,
@@ -1006,11 +982,6 @@
 
 	mutex_lock(&video->stream_lock);
 
-	if (video->streaming) {
-		mutex_unlock(&video->stream_lock);
-		return -EBUSY;
-	}
-
 	/* Start streaming on the pipeline. No link touching an entity in the
 	 * pipeline can be activated or deactivated once streaming is started.
 	 */
@@ -1069,7 +1040,9 @@
 	INIT_LIST_HEAD(&video->dmaqueue);
 	atomic_set(&pipe->frame_number, -1);
 
-	ret = omap3isp_video_queue_streamon(&vfh->queue);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_streamon(&vfh->queue, type);
+	mutex_unlock(&video->queue_lock);
 	if (ret < 0)
 		goto err_check_format;
 
@@ -1082,19 +1055,19 @@
 					      ISP_PIPELINE_STREAM_CONTINUOUS);
 		if (ret < 0)
 			goto err_set_stream;
-		spin_lock_irqsave(&video->queue->irqlock, flags);
+		spin_lock_irqsave(&video->irqlock, flags);
 		if (list_empty(&video->dmaqueue))
 			video->dmaqueue_flags |= ISP_VIDEO_DMAQUEUE_UNDERRUN;
-		spin_unlock_irqrestore(&video->queue->irqlock, flags);
+		spin_unlock_irqrestore(&video->irqlock, flags);
 	}
 
-	video->streaming = 1;
-
 	mutex_unlock(&video->stream_lock);
 	return 0;
 
 err_set_stream:
-	omap3isp_video_queue_streamoff(&vfh->queue);
+	mutex_lock(&video->queue_lock);
+	vb2_streamoff(&vfh->queue, type);
+	mutex_unlock(&video->queue_lock);
 err_check_format:
 	media_entity_pipeline_stop(&video->video.entity);
 err_pipeline_start:
@@ -1130,9 +1103,9 @@
 	mutex_lock(&video->stream_lock);
 
 	/* Make sure we're not streaming yet. */
-	mutex_lock(&vfh->queue.lock);
-	streaming = vfh->queue.streaming;
-	mutex_unlock(&vfh->queue.lock);
+	mutex_lock(&video->queue_lock);
+	streaming = vb2_is_streaming(&vfh->queue);
+	mutex_unlock(&video->queue_lock);
 
 	if (!streaming)
 		goto done;
@@ -1151,9 +1124,12 @@
 
 	/* Stop the stream. */
 	omap3isp_pipeline_set_stream(pipe, ISP_PIPELINE_STREAM_STOPPED);
-	omap3isp_video_queue_streamoff(&vfh->queue);
+	omap3isp_video_cancel_stream(video);
+
+	mutex_lock(&video->queue_lock);
+	vb2_streamoff(&vfh->queue, type);
+	mutex_unlock(&video->queue_lock);
 	video->queue = NULL;
-	video->streaming = 0;
 	video->error = false;
 
 	if (video->isp->pdata->set_constraints)
@@ -1223,6 +1199,7 @@
 {
 	struct isp_video *video = video_drvdata(file);
 	struct isp_video_fh *handle;
+	struct vb2_queue *queue;
 	int ret = 0;
 
 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
@@ -1244,9 +1221,20 @@
 		goto done;
 	}
 
-	omap3isp_video_queue_init(&handle->queue, video->type,
-				  &isp_video_queue_ops, video->isp->dev,
-				  sizeof(struct isp_buffer));
+	queue = &handle->queue;
+	queue->type = video->type;
+	queue->io_modes = VB2_MMAP | VB2_USERPTR;
+	queue->drv_priv = handle;
+	queue->ops = &isp_video_queue_ops;
+	queue->mem_ops = &vb2_dma_contig_memops;
+	queue->buf_struct_size = sizeof(struct isp_buffer);
+	queue->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+
+	ret = vb2_queue_init(&handle->queue);
+	if (ret < 0) {
+		omap3isp_put(video->isp);
+		goto done;
+	}
 
 	memset(&handle->format, 0, sizeof(handle->format));
 	handle->format.type = video->type;
@@ -1273,9 +1261,9 @@
 	/* Disable streaming and free the buffers queue resources. */
 	isp_video_streamoff(file, vfh, video->type);
 
-	mutex_lock(&handle->queue.lock);
-	omap3isp_video_queue_cleanup(&handle->queue);
-	mutex_unlock(&handle->queue.lock);
+	mutex_lock(&video->queue_lock);
+	vb2_queue_release(&handle->queue);
+	mutex_unlock(&video->queue_lock);
 
 	omap3isp_pipeline_pm_use(&video->video.entity, 0);
 
@@ -1292,16 +1280,27 @@
 static unsigned int isp_video_poll(struct file *file, poll_table *wait)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(file->private_data);
-	struct isp_video_queue *queue = &vfh->queue;
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_poll(queue, file, wait);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_poll(&vfh->queue, file, wait);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static int isp_video_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct isp_video_fh *vfh = to_isp_video_fh(file->private_data);
+	struct isp_video *video = video_drvdata(file);
+	int ret;
 
-	return omap3isp_video_queue_mmap(&vfh->queue, vma);
+	mutex_lock(&video->queue_lock);
+	ret = vb2_mmap(&vfh->queue, vma);
+	mutex_unlock(&video->queue_lock);
+
+	return ret;
 }
 
 static struct v4l2_file_operations isp_video_fops = {
@@ -1342,15 +1341,23 @@
 		return -EINVAL;
 	}
 
+	video->alloc_ctx = vb2_dma_contig_init_ctx(video->isp->dev);
+	if (IS_ERR(video->alloc_ctx))
+		return PTR_ERR(video->alloc_ctx);
+
 	ret = media_entity_init(&video->video.entity, 1, &video->pad, 0);
-	if (ret < 0)
+	if (ret < 0) {
+		vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 		return ret;
+	}
 
 	mutex_init(&video->mutex);
 	atomic_set(&video->active, 0);
 
 	spin_lock_init(&video->pipe.lock);
 	mutex_init(&video->stream_lock);
+	mutex_init(&video->queue_lock);
+	spin_lock_init(&video->irqlock);
 
 	/* Initialize the video device. */
 	if (video->ops == NULL)
@@ -1371,7 +1378,9 @@
 
 void omap3isp_video_cleanup(struct isp_video *video)
 {
+	vb2_dma_contig_cleanup_ctx(video->alloc_ctx);
 	media_entity_cleanup(&video->video.entity);
+	mutex_destroy(&video->queue_lock);
 	mutex_destroy(&video->stream_lock);
 	mutex_destroy(&video->mutex);
 }

diff --git a/drivers/media/platform/omap3isp/ispvideo.h b/drivers/media/platform/omap3isp/ispvideo.h
index 4e19407..7d2e821 100644
--- a/drivers/media/platform/omap3isp/ispvideo.h
+++ b/drivers/media/platform/omap3isp/ispvideo.h

@@ -30,8 +30,7 @@
 #include <media/media-entity.h>
 #include <media/v4l2-dev.h>
 #include <media/v4l2-fh.h>
-
-#include "ispqueue.h"
+#include <media/videobuf2-core.h>
 
 #define ISP_VIDEO_DRIVER_NAME		"ispvideo"
 #define ISP_VIDEO_DRIVER_VERSION	"0.0.2"
@@ -124,17 +123,19 @@
 			       ISP_PIPELINE_IDLE_OUTPUT);
 }
 
-/*
- * struct isp_buffer - ISP buffer
- * @buffer: ISP video buffer
- * @isp_addr: MMU mapped address (a.k.a. device address) of the buffer.
+/**
+ * struct isp_buffer - ISP video buffer
+ * @vb: videobuf2 buffer
+ * @irqlist: List head for insertion into IRQ queue
+ * @dma: DMA address
  */
 struct isp_buffer {
-	struct isp_video_buffer buffer;
-	dma_addr_t isp_addr;
+	struct vb2_buffer vb;
+	struct list_head irqlist;
+	dma_addr_t dma;
 };
 
-#define to_isp_buffer(buf)	container_of(buf, struct isp_buffer, buffer)
+#define to_isp_buffer(buf)	container_of(buf, struct isp_buffer, vb)
 
 enum isp_video_dmaqueue_flags {
 	/* Set if DMA queue becomes empty when ISP_PIPELINE_STREAM_CONTINUOUS */
@@ -172,16 +173,16 @@
 	unsigned int bpl_value;		/* bytes per line value */
 	unsigned int bpl_padding;	/* padding at end of line */
 
-	/* Entity video node streaming */
-	unsigned int streaming:1;
-
 	/* Pipeline state */
 	struct isp_pipeline pipe;
 	struct mutex stream_lock;	/* pipeline and stream states */
 	bool error;
 
 	/* Video buffers queue */
-	struct isp_video_queue *queue;
+	void *alloc_ctx;
+	struct vb2_queue *queue;
+	struct mutex queue_lock;	/* protects the queue */
+	spinlock_t irqlock;		/* protects dmaqueue */
 	struct list_head dmaqueue;
 	enum isp_video_dmaqueue_flags dmaqueue_flags;
 
@@ -193,7 +194,7 @@
 struct isp_video_fh {
 	struct v4l2_fh vfh;
 	struct isp_video *video;
-	struct isp_video_queue queue;
+	struct vb2_queue queue;
 	struct v4l2_format format;
 	struct v4l2_fract timeperframe;
 };

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 349e659..7c4489c 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c

@@ -1200,6 +1200,30 @@
 EXPORT_SYMBOL_GPL(vb2_buffer_done);
 
 /**
+ * vb2_discard_done() - discard all buffers marked as DONE
+ * @q:		videobuf2 queue
+ *
+ * This function is intended to be used with suspend/resume operations. It
+ * discards all 'done' buffers as they would be too old to be requested after
+ * resume.
+ *
+ * Drivers must stop the hardware and synchronize with interrupt handlers and/or
+ * delayed works before calling this function to make sure no buffer will be
+ * touched by the driver and/or hardware.
+ */
+void vb2_discard_done(struct vb2_queue *q)
+{
+	struct vb2_buffer *vb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->done_lock, flags);
+	list_for_each_entry(vb, &q->done_list, done_entry)
+		vb->state = VB2_BUF_STATE_ERROR;
+	spin_unlock_irqrestore(&q->done_lock, flags);
+}
+EXPORT_SYMBOL_GPL(vb2_discard_done);
+
+/**
  * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a
  * v4l2_buffer by the userspace. The caller has already verified that struct
  * v4l2_buffer has a valid number of planes.

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 02832d6..baca589 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig

@@ -1773,6 +1773,7 @@
 config SCSI_VIRTIO
 	tristate "virtio-scsi support"
 	depends on VIRTIO
+	select BLK_DEV_INTEGRITY
 	help
           This is the virtual HBA driver for virtio.  If the kernel will
           be used in a virtual machine, say Y or M.

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 5858600..31184b3 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c

@@ -48,6 +48,7 @@
 #include <linux/bitmap.h>
 #include <linux/atomic.h>
 #include <linux/jiffies.h>
+#include <linux/percpu.h>
 #include <asm/div64.h>
 #include "hpsa_cmd.h"
 #include "hpsa.h"
@@ -193,7 +194,8 @@
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id);
 static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id);
 static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg);
-static void start_io(struct ctlr_info *h);
+static void lock_and_start_io(struct ctlr_info *h);
+static void start_io(struct ctlr_info *h, unsigned long *flags);
 
 #ifdef CONFIG_COMPAT
 static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd, void *arg);
@@ -695,7 +697,7 @@
 static inline u32 next_command(struct ctlr_info *h, u8 q)
 {
 	u32 a;
-	struct reply_pool *rq = &h->reply_queue[q];
+	struct reply_queue_buffer *rq = &h->reply_queue[q];
 	unsigned long flags;
 
 	if (h->transMethod & CFGTBL_Trans_io_accel1)
@@ -844,8 +846,8 @@
 	spin_lock_irqsave(&h->lock, flags);
 	addQ(&h->reqQ, c);
 	h->Qdepth++;
+	start_io(h, &flags);
 	spin_unlock_irqrestore(&h->lock, flags);
-	start_io(h);
 }
 
 static inline void removeQ(struct CommandList *c)
@@ -1554,9 +1556,13 @@
 			dev_warn(&h->pdev->dev,
 				"%s: task complete with check condition.\n",
 				"HP SSD Smart Path");
+			cmd->result |= SAM_STAT_CHECK_CONDITION;
 			if (c2->error_data.data_present !=
-					IOACCEL2_SENSE_DATA_PRESENT)
+					IOACCEL2_SENSE_DATA_PRESENT) {
+				memset(cmd->sense_buffer, 0,
+					SCSI_SENSE_BUFFERSIZE);
 				break;
+			}
 			/* copy the sense data */
 			data_len = c2->error_data.sense_data_len;
 			if (data_len > SCSI_SENSE_BUFFERSIZE)
@@ -1566,7 +1572,6 @@
 					sizeof(c2->error_data.sense_data_buff);
 			memcpy(cmd->sense_buffer,
 				c2->error_data.sense_data_buff, data_len);
-			cmd->result |= SAM_STAT_CHECK_CONDITION;
 			retry = 1;
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_BUSY:
@@ -1651,16 +1656,6 @@
 	if (is_logical_dev_addr_mode(dev->scsi3addr) &&
 		c2->error_data.serv_response ==
 			IOACCEL2_SERV_RESPONSE_FAILURE) {
-		if (c2->error_data.status ==
-			IOACCEL2_STATUS_SR_IOACCEL_DISABLED)
-			dev_warn(&h->pdev->dev,
-				"%s: Path is unavailable, retrying on standard path.\n",
-				"HP SSD Smart Path");
-		else
-			dev_warn(&h->pdev->dev,
-				"%s: Error 0x%02x, retrying on standard path.\n",
-				"HP SSD Smart Path", c2->error_data.status);
-
 		dev->offload_enabled = 0;
 		h->drv_req_rescan = 1;	/* schedule controller for a rescan */
 		cmd->result = DID_SOFT_ERROR << 16;
@@ -1991,20 +1986,26 @@
 	wait_for_completion(&wait);
 }
 
+static u32 lockup_detected(struct ctlr_info *h)
+{
+	int cpu;
+	u32 rc, *lockup_detected;
+
+	cpu = get_cpu();
+	lockup_detected = per_cpu_ptr(h->lockup_detected, cpu);
+	rc = *lockup_detected;
+	put_cpu();
+	return rc;
+}
+
 static void hpsa_scsi_do_simple_cmd_core_if_no_lockup(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	unsigned long flags;
-
 	/* If controller lockup detected, fake a hardware error. */
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h)))
 		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-	} else {
-		spin_unlock_irqrestore(&h->lock, flags);
+	else
 		hpsa_scsi_do_simple_cmd_core(h, c);
-	}
 }
 
 #define MAX_DRIVER_CMD_RETRIES 25
@@ -2429,7 +2430,7 @@
 		buflen = 16;
 	buf = kzalloc(64, GFP_KERNEL);
 	if (!buf)
-		return -1;
+		return -ENOMEM;
 	rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | 0x83, buf, 64);
 	if (rc == 0)
 		memcpy(device_id, &buf[8], buflen);
@@ -2515,27 +2516,21 @@
 		return HPSA_VPD_LV_STATUS_UNSUPPORTED;
 
 	/* Does controller have VPD for logical volume status? */
-	if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS)) {
-		dev_warn(&h->pdev->dev, "Logical volume status VPD page is unsupported.\n");
+	if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS))
 		goto exit_failed;
-	}
 
 	/* Get the size of the VPD return buffer */
 	rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS,
 					buf, HPSA_VPD_HEADER_SZ);
-	if (rc != 0) {
-		dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n");
+	if (rc != 0)
 		goto exit_failed;
-	}
 	size = buf[3];
 
 	/* Now get the whole VPD buffer */
 	rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS,
 					buf, size + HPSA_VPD_HEADER_SZ);
-	if (rc != 0) {
-		dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n");
+	if (rc != 0)
 		goto exit_failed;
-	}
 	status = buf[4]; /* status byte */
 
 	kfree(buf);
@@ -2548,11 +2543,11 @@
 /* Determine offline status of a volume.
  * Return either:
  *  0 (not offline)
- * -1 (offline for unknown reasons)
+ *  0xff (offline for unknown reasons)
  *  # (integer code indicating one of several NOT READY states
  *     describing why a volume is to be kept offline)
  */
-static unsigned char hpsa_volume_offline(struct ctlr_info *h,
+static int hpsa_volume_offline(struct ctlr_info *h,
 					unsigned char scsi3addr[])
 {
 	struct CommandList *c;
@@ -2651,11 +2646,15 @@
 
 	if (this_device->devtype == TYPE_DISK &&
 		is_logical_dev_addr_mode(scsi3addr)) {
+		int volume_offline;
+
 		hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
 		if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
 			hpsa_get_ioaccel_status(h, scsi3addr, this_device);
-		this_device->volume_offline =
-			hpsa_volume_offline(h, scsi3addr);
+		volume_offline = hpsa_volume_offline(h, scsi3addr);
+		if (volume_offline < 0 || volume_offline > 0xff)
+			volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
+		this_device->volume_offline = volume_offline & 0xff;
 	} else {
 		this_device->raid_level = RAID_UNKNOWN;
 		this_device->offload_config = 0;
@@ -2861,26 +2860,20 @@
 	nphysicals = be32_to_cpu(*((__be32 *)physicals->LUNListLength)) /
 							responsesize;
 
-
 	/* find ioaccel2 handle in list of physicals: */
 	for (i = 0; i < nphysicals; i++) {
+		struct ext_report_lun_entry *entry = &physicals->LUN[i];
+
 		/* handle is in bytes 28-31 of each lun */
-		if (memcmp(&((struct ReportExtendedLUNdata *)
-				physicals)->LUN[i][20], &find, 4) != 0) {
+		if (entry->ioaccel_handle != find)
 			continue; /* didn't match */
-		}
 		found = 1;
-		memcpy(scsi3addr, &((struct ReportExtendedLUNdata *)
-					physicals)->LUN[i][0], 8);
+		memcpy(scsi3addr, entry->lunid, 8);
 		if (h->raid_offload_debug > 0)
 			dev_info(&h->pdev->dev,
-				"%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+				"%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%8phN\n",
 				__func__, find,
-				((struct ReportExtendedLUNdata *)
-					physicals)->LUN[i][20],
-				scsi3addr[0], scsi3addr[1], scsi3addr[2],
-				scsi3addr[3], scsi3addr[4], scsi3addr[5],
-				scsi3addr[6], scsi3addr[7]);
+				entry->ioaccel_handle, scsi3addr);
 		break; /* found it */
 	}
 
@@ -2965,7 +2958,8 @@
 		return RAID_CTLR_LUNID;
 
 	if (i < logicals_start)
-		return &physdev_list->LUN[i - (raid_ctlr_position == 0)][0];
+		return &physdev_list->LUN[i -
+				(raid_ctlr_position == 0)].lunid[0];
 
 	if (i < last_device)
 		return &logdev_list->LUN[i - nphysicals -
@@ -3074,7 +3068,7 @@
 		ndev_allocated++;
 	}
 
-	if (unlikely(is_scsi_rev_5(h)))
+	if (is_scsi_rev_5(h))
 		raid_ctlr_position = 0;
 	else
 		raid_ctlr_position = nphysicals + nlogicals;
@@ -3971,7 +3965,6 @@
 	struct hpsa_scsi_dev_t *dev;
 	unsigned char scsi3addr[8];
 	struct CommandList *c;
-	unsigned long flags;
 	int rc = 0;
 
 	/* Get the ptr to our adapter structure out of cmd->host. */
@@ -3984,14 +3977,11 @@
 	}
 	memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
 
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h))) {
 		cmd->result = DID_ERROR << 16;
 		done(cmd);
 		return 0;
 	}
-	spin_unlock_irqrestore(&h->lock, flags);
 	c = cmd_alloc(h);
 	if (c == NULL) {			/* trouble... */
 		dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
@@ -4103,16 +4093,13 @@
 	 * we can prevent new rescan threads from piling up on a
 	 * locked up controller.
 	 */
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h))) {
 		spin_lock_irqsave(&h->scan_lock, flags);
 		h->scan_finished = 1;
 		wake_up_all(&h->scan_wait_queue);
 		spin_unlock_irqrestore(&h->scan_lock, flags);
 		return 1;
 	}
-	spin_unlock_irqrestore(&h->lock, flags);
 	return 0;
 }
 
@@ -4963,7 +4950,7 @@
 		buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
 		if (buff == NULL)
 			return -EFAULT;
-		if (iocommand.Request.Type.Direction == XFER_WRITE) {
+		if (iocommand.Request.Type.Direction & XFER_WRITE) {
 			/* Copy the data into the buffer we created */
 			if (copy_from_user(buff, iocommand.buf,
 				iocommand.buf_size)) {
@@ -5026,7 +5013,7 @@
 		rc = -EFAULT;
 		goto out;
 	}
-	if (iocommand.Request.Type.Direction == XFER_READ &&
+	if ((iocommand.Request.Type.Direction & XFER_READ) &&
 		iocommand.buf_size > 0) {
 		/* Copy the data out of the buffer we created */
 		if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
@@ -5103,7 +5090,7 @@
 			status = -ENOMEM;
 			goto cleanup1;
 		}
-		if (ioc->Request.Type.Direction == XFER_WRITE) {
+		if (ioc->Request.Type.Direction & XFER_WRITE) {
 			if (copy_from_user(buff[sg_used], data_ptr, sz)) {
 				status = -ENOMEM;
 				goto cleanup1;
@@ -5155,7 +5142,7 @@
 		status = -EFAULT;
 		goto cleanup0;
 	}
-	if (ioc->Request.Type.Direction == XFER_READ && ioc->buf_size > 0) {
+	if ((ioc->Request.Type.Direction & XFER_READ) && ioc->buf_size > 0) {
 		/* Copy the data out of the buffer we created */
 		BYTE __user *ptr = ioc->buf;
 		for (i = 0; i < sg_used; i++) {
@@ -5459,13 +5446,12 @@
 
 /* Takes cmds off the submission queue and sends them to the hardware,
  * then puts them on the queue of cmds waiting for completion.
+ * Assumes h->lock is held
  */
-static void start_io(struct ctlr_info *h)
+static void start_io(struct ctlr_info *h, unsigned long *flags)
 {
 	struct CommandList *c;
-	unsigned long flags;
 
-	spin_lock_irqsave(&h->lock, flags);
 	while (!list_empty(&h->reqQ)) {
 		c = list_entry(h->reqQ.next, struct CommandList, list);
 		/* can't do anything if fifo is full */
@@ -5488,14 +5474,20 @@
 		 * condition.
 		 */
 		h->commands_outstanding++;
-		if (h->commands_outstanding > h->max_outstanding)
-			h->max_outstanding = h->commands_outstanding;
 
 		/* Tell the controller execute command */
-		spin_unlock_irqrestore(&h->lock, flags);
+		spin_unlock_irqrestore(&h->lock, *flags);
 		h->access.submit_command(h, c);
-		spin_lock_irqsave(&h->lock, flags);
+		spin_lock_irqsave(&h->lock, *flags);
 	}
+}
+
+static void lock_and_start_io(struct ctlr_info *h)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	start_io(h, &flags);
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
@@ -5563,7 +5555,7 @@
 	else if (c->cmd_type == CMD_IOCTL_PEND)
 		complete(c->waiting);
 	if (unlikely(io_may_be_stalled))
-		start_io(h);
+		lock_and_start_io(h);
 }
 
 static inline u32 hpsa_tag_contains_index(u32 tag)
@@ -5840,12 +5832,12 @@
 		dev_info(&pdev->dev, "using doorbell to reset controller\n");
 		writel(use_doorbell, vaddr + SA5_DOORBELL);
 
-		/* PMC hardware guys tell us we need a 5 second delay after
+		/* PMC hardware guys tell us we need a 10 second delay after
 		 * doorbell reset and before any attempt to talk to the board
 		 * at all to ensure that this actually works and doesn't fall
 		 * over in some weird corner cases.
 		 */
-		msleep(5000);
+		msleep(10000);
 	} else { /* Try to do it the PCI power state way */
 
 		/* Quoting from the Open CISS Specification: "The Power
@@ -6166,6 +6158,8 @@
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
 		dev_info(&h->pdev->dev, "MSIX\n");
 		h->msix_vector = MAX_REPLY_QUEUES;
+		if (h->msix_vector > num_online_cpus())
+			h->msix_vector = num_online_cpus();
 		err = pci_enable_msix(h->pdev, hpsa_msix_entries,
 				      h->msix_vector);
 		if (err > 0) {
@@ -6615,6 +6609,17 @@
 			h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
 }
 
+static void hpsa_irq_affinity_hints(struct ctlr_info *h)
+{
+	int i, cpu, rc;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < h->msix_vector; i++) {
+		rc = irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu));
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
+}
+
 static int hpsa_request_irq(struct ctlr_info *h,
 	irqreturn_t (*msixhandler)(int, void *),
 	irqreturn_t (*intxhandler)(int, void *))
@@ -6634,6 +6639,7 @@
 			rc = request_irq(h->intr[i], msixhandler,
 					0, h->devname,
 					&h->q[i]);
+		hpsa_irq_affinity_hints(h);
 	} else {
 		/* Use single reply pool */
 		if (h->msix_vector > 0 || h->msi_vector) {
@@ -6685,12 +6691,15 @@
 	if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
 		/* Single reply queue, only one irq to free */
 		i = h->intr_mode;
+		irq_set_affinity_hint(h->intr[i], NULL);
 		free_irq(h->intr[i], &h->q[i]);
 		return;
 	}
 
-	for (i = 0; i < h->msix_vector; i++)
+	for (i = 0; i < h->msix_vector; i++) {
+		irq_set_affinity_hint(h->intr[i], NULL);
 		free_irq(h->intr[i], &h->q[i]);
+	}
 }
 
 static void hpsa_free_irqs_and_disable_msix(struct ctlr_info *h)
@@ -6707,6 +6716,20 @@
 #endif /* CONFIG_PCI_MSI */
 }
 
+static void hpsa_free_reply_queues(struct ctlr_info *h)
+{
+	int i;
+
+	for (i = 0; i < h->nreply_queues; i++) {
+		if (!h->reply_queue[i].head)
+			continue;
+		pci_free_consistent(h->pdev, h->reply_queue_size,
+			h->reply_queue[i].head, h->reply_queue[i].busaddr);
+		h->reply_queue[i].head = NULL;
+		h->reply_queue[i].busaddr = 0;
+	}
+}
+
 static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
 {
 	hpsa_free_irqs_and_disable_msix(h);
@@ -6714,8 +6737,7 @@
 	hpsa_free_cmd_pool(h);
 	kfree(h->ioaccel1_blockFetchTable);
 	kfree(h->blockFetchTable);
-	pci_free_consistent(h->pdev, h->reply_pool_size,
-		h->reply_pool, h->reply_pool_dhandle);
+	hpsa_free_reply_queues(h);
 	if (h->vaddr)
 		iounmap(h->vaddr);
 	if (h->transtable)
@@ -6740,16 +6762,38 @@
 	}
 }
 
+static void set_lockup_detected_for_all_cpus(struct ctlr_info *h, u32 value)
+{
+	int i, cpu;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < num_online_cpus(); i++) {
+		u32 *lockup_detected;
+		lockup_detected = per_cpu_ptr(h->lockup_detected, cpu);
+		*lockup_detected = value;
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
+	wmb(); /* be sure the per-cpu variables are out to memory */
+}
+
 static void controller_lockup_detected(struct ctlr_info *h)
 {
 	unsigned long flags;
+	u32 lockup_detected;
 
 	h->access.set_intr_mask(h, HPSA_INTR_OFF);
 	spin_lock_irqsave(&h->lock, flags);
-	h->lockup_detected = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+	lockup_detected = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+	if (!lockup_detected) {
+		/* no heartbeat, but controller gave us a zero. */
+		dev_warn(&h->pdev->dev,
+			"lockup detected but scratchpad register is zero\n");
+		lockup_detected = 0xffffffff;
+	}
+	set_lockup_detected_for_all_cpus(h, lockup_detected);
 	spin_unlock_irqrestore(&h->lock, flags);
 	dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
-			h->lockup_detected);
+			lockup_detected);
 	pci_disable_device(h->pdev);
 	spin_lock_irqsave(&h->lock, flags);
 	fail_all_cmds_on_list(h, &h->cmpQ);
@@ -6884,7 +6928,7 @@
 	struct ctlr_info *h = container_of(to_delayed_work(work),
 					struct ctlr_info, monitor_ctlr_work);
 	detect_controller_lockup(h);
-	if (h->lockup_detected)
+	if (lockup_detected(h))
 		return;
 
 	if (hpsa_ctlr_needs_rescan(h) || hpsa_offline_devices_ready(h)) {
@@ -6934,7 +6978,6 @@
 	 * the 5 lower bits of the address are used by the hardware. and by
 	 * the driver.  See comments in hpsa.h for more info.
 	 */
-#define COMMANDLIST_ALIGNMENT 128
 	BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT);
 	h = kzalloc(sizeof(*h), GFP_KERNEL);
 	if (!h)
@@ -6949,6 +6992,13 @@
 	spin_lock_init(&h->offline_device_lock);
 	spin_lock_init(&h->scan_lock);
 	spin_lock_init(&h->passthru_count_lock);
+
+	/* Allocate and clear per-cpu variable lockup_detected */
+	h->lockup_detected = alloc_percpu(u32);
+	if (!h->lockup_detected)
+		goto clean1;
+	set_lockup_detected_for_all_cpus(h, 0);
+
 	rc = hpsa_pci_init(h);
 	if (rc != 0)
 		goto clean1;
@@ -7072,6 +7122,8 @@
 	free_irqs(h);
 clean2:
 clean1:
+	if (h->lockup_detected)
+		free_percpu(h->lockup_detected);
 	kfree(h);
 	return rc;
 }
@@ -7080,16 +7132,10 @@
 {
 	char *flush_buf;
 	struct CommandList *c;
-	unsigned long flags;
 
 	/* Don't bother trying to flush the cache if locked up */
-	spin_lock_irqsave(&h->lock, flags);
-	if (unlikely(h->lockup_detected)) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	if (unlikely(lockup_detected(h)))
 		return;
-	}
-	spin_unlock_irqrestore(&h->lock, flags);
-
 	flush_buf = kzalloc(4, GFP_KERNEL);
 	if (!flush_buf)
 		return;
@@ -7165,8 +7211,7 @@
 	pci_free_consistent(h->pdev,
 		h->nr_cmds * sizeof(struct ErrorInfo),
 		h->errinfo_pool, h->errinfo_pool_dhandle);
-	pci_free_consistent(h->pdev, h->reply_pool_size,
-		h->reply_pool, h->reply_pool_dhandle);
+	hpsa_free_reply_queues(h);
 	kfree(h->cmd_pool_bits);
 	kfree(h->blockFetchTable);
 	kfree(h->ioaccel1_blockFetchTable);
@@ -7174,6 +7219,7 @@
 	kfree(h->hba_inquiry_data);
 	pci_disable_device(pdev);
 	pci_release_regions(pdev);
+	free_percpu(h->lockup_detected);
 	kfree(h);
 }
 
@@ -7278,8 +7324,16 @@
 	 * 10 = 6 s/g entry or 24k
 	 */
 
+	/* If the controller supports either ioaccel method then
+	 * we can also use the RAID stack submit path that does not
+	 * perform the superfluous readl() after each command submission.
+	 */
+	if (trans_support & (CFGTBL_Trans_io_accel1 | CFGTBL_Trans_io_accel2))
+		access = SA5_performant_access_no_read;
+
 	/* Controller spec: zero out this buffer. */
-	memset(h->reply_pool, 0, h->reply_pool_size);
+	for (i = 0; i < h->nreply_queues; i++)
+		memset(h->reply_queue[i].head, 0, h->reply_queue_size);
 
 	bft[7] = SG_ENTRIES_IN_CMD + 4;
 	calc_bucket_map(bft, ARRAY_SIZE(bft),
@@ -7295,8 +7349,7 @@
 
 	for (i = 0; i < h->nreply_queues; i++) {
 		writel(0, &h->transtable->RepQAddr[i].upper);
-		writel(h->reply_pool_dhandle +
-			(h->max_commands * sizeof(u64) * i),
+		writel(h->reply_queue[i].busaddr,
 			&h->transtable->RepQAddr[i].lower);
 	}
 
@@ -7344,8 +7397,10 @@
 				h->ioaccel1_blockFetchTable);
 
 		/* initialize all reply queue entries to unused */
-		memset(h->reply_pool, (u8) IOACCEL_MODE1_REPLY_UNUSED,
-				h->reply_pool_size);
+		for (i = 0; i < h->nreply_queues; i++)
+			memset(h->reply_queue[i].head,
+				(u8) IOACCEL_MODE1_REPLY_UNUSED,
+				h->reply_queue_size);
 
 		/* set all the constant fields in the accelerator command
 		 * frames once at init time to save CPU cycles later.
@@ -7407,7 +7462,6 @@
 	 * because the 7 lower bits of the address are used by the
 	 * hardware.
 	 */
-#define IOACCEL1_COMMANDLIST_ALIGNMENT 128
 	BUILD_BUG_ON(sizeof(struct io_accel1_cmd) %
 			IOACCEL1_COMMANDLIST_ALIGNMENT);
 	h->ioaccel_cmd_pool =
@@ -7445,7 +7499,6 @@
 	if (h->ioaccel_maxsg > IOACCEL2_MAXSGENTRIES)
 		h->ioaccel_maxsg = IOACCEL2_MAXSGENTRIES;
 
-#define IOACCEL2_COMMANDLIST_ALIGNMENT 128
 	BUILD_BUG_ON(sizeof(struct io_accel2_cmd) %
 			IOACCEL2_COMMANDLIST_ALIGNMENT);
 	h->ioaccel2_cmd_pool =
@@ -7503,16 +7556,17 @@
 		}
 	}
 
-	/* TODO, check that this next line h->nreply_queues is correct */
 	h->nreply_queues = h->msix_vector > 0 ? h->msix_vector : 1;
 	hpsa_get_max_perf_mode_cmds(h);
 	/* Performant mode ring buffer and supporting data structures */
-	h->reply_pool_size = h->max_commands * sizeof(u64) * h->nreply_queues;
-	h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size,
-				&(h->reply_pool_dhandle));
+	h->reply_queue_size = h->max_commands * sizeof(u64);
 
 	for (i = 0; i < h->nreply_queues; i++) {
-		h->reply_queue[i].head = &h->reply_pool[h->max_commands * i];
+		h->reply_queue[i].head = pci_alloc_consistent(h->pdev,
+						h->reply_queue_size,
+						&(h->reply_queue[i].busaddr));
+		if (!h->reply_queue[i].head)
+			goto clean_up;
 		h->reply_queue[i].size = h->max_commands;
 		h->reply_queue[i].wraparound = 1;  /* spec: init to 1 */
 		h->reply_queue[i].current_entry = 0;
@@ -7521,18 +7575,14 @@
 	/* Need a block fetch table for performant mode */
 	h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) *
 				sizeof(u32)), GFP_KERNEL);
-
-	if ((h->reply_pool == NULL)
-		|| (h->blockFetchTable == NULL))
+	if (!h->blockFetchTable)
 		goto clean_up;
 
 	hpsa_enter_performant_mode(h, trans_support);
 	return;
 
 clean_up:
-	if (h->reply_pool)
-		pci_free_consistent(h->pdev, h->reply_pool_size,
-			h->reply_pool, h->reply_pool_dhandle);
+	hpsa_free_reply_queues(h);
 	kfree(h->blockFetchTable);
 }
 

diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 1e3cf33..24472ce 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h

@@ -57,11 +57,12 @@
 
 };
 
-struct reply_pool {
+struct reply_queue_buffer {
 	u64 *head;
 	size_t size;
 	u8 wraparound;
 	u32 current_entry;
+	dma_addr_t busaddr;
 };
 
 #pragma pack(1)
@@ -116,11 +117,8 @@
 	int 	nr_cmds; /* Number of commands allowed on this controller */
 	struct CfgTable __iomem *cfgtable;
 	int	interrupts_enabled;
-	int	major;
 	int 	max_commands;
 	int	commands_outstanding;
-	int 	max_outstanding; /* Debug */
-	int	usage_count;  /* number of opens all all minor devices */
 #	define PERF_MODE_INT	0
 #	define DOORBELL_INT	1
 #	define SIMPLE_MODE_INT	2
@@ -177,11 +175,9 @@
 	/*
 	 * Performant mode completion buffers
 	 */
-	u64 *reply_pool;
-	size_t reply_pool_size;
-	struct reply_pool reply_queue[MAX_REPLY_QUEUES];
+	size_t reply_queue_size;
+	struct reply_queue_buffer reply_queue[MAX_REPLY_QUEUES];
 	u8 nreply_queues;
-	dma_addr_t reply_pool_dhandle;
 	u32 *blockFetchTable;
 	u32 *ioaccel1_blockFetchTable;
 	u32 *ioaccel2_blockFetchTable;
@@ -196,7 +192,7 @@
 	u64 last_heartbeat_timestamp;
 	u32 heartbeat_sample_interval;
 	atomic_t firmware_flash_in_progress;
-	u32 lockup_detected;
+	u32 *lockup_detected;
 	struct delayed_work monitor_ctlr_work;
 	int remove_in_progress;
 	u32 fifo_recently_full;
@@ -233,11 +229,9 @@
 #define CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE	(1 << 31)
 
 #define RESCAN_REQUIRED_EVENT_BITS \
-		(CTLR_STATE_CHANGE_EVENT | \
-		CTLR_ENCLOSURE_HOT_PLUG_EVENT | \
+		(CTLR_ENCLOSURE_HOT_PLUG_EVENT | \
 		CTLR_STATE_CHANGE_EVENT_PHYSICAL_DRV | \
 		CTLR_STATE_CHANGE_EVENT_LOGICAL_DRV | \
-		CTLR_STATE_CHANGE_EVENT_REDUNDANT_CNTRL | \
 		CTLR_STATE_CHANGE_EVENT_AIO_ENABLED_DISABLED | \
 		CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE)
 	spinlock_t offline_device_lock;
@@ -346,22 +340,23 @@
 static void SA5_submit_command(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr,
-		c->Header.Tag.lower);
 	writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
 	(void) readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
 }
 
+static void SA5_submit_command_no_read(struct ctlr_info *h,
+	struct CommandList *c)
+{
+	writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
+}
+
 static void SA5_submit_command_ioaccel2(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr,
-		c->Header.Tag.lower);
 	if (c->cmd_type == CMD_IOACCEL2)
 		writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
 	else
 		writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
-	(void) readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
 }
 
 /*
@@ -399,7 +394,7 @@
 
 static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q)
 {
-	struct reply_pool *rq = &h->reply_queue[q];
+	struct reply_queue_buffer *rq = &h->reply_queue[q];
 	unsigned long flags, register_value = FIFO_EMPTY;
 
 	/* msi auto clears the interrupt pending bit. */
@@ -478,7 +473,6 @@
 {
 	unsigned long register_value  =
 		readl(h->vaddr + SA5_INTR_STATUS);
-	dev_dbg(&h->pdev->dev, "intr_pending %lx\n", register_value);
 	return register_value & SA5_INTR_PENDING;
 }
 
@@ -515,7 +509,7 @@
 static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q)
 {
 	u64 register_value;
-	struct reply_pool *rq = &h->reply_queue[q];
+	struct reply_queue_buffer *rq = &h->reply_queue[q];
 	unsigned long flags;
 
 	BUG_ON(q >= h->nreply_queues);
@@ -573,6 +567,14 @@
 	SA5_performant_completed,
 };
 
+static struct access_method SA5_performant_access_no_read = {
+	SA5_submit_command_no_read,
+	SA5_performant_intr_mask,
+	SA5_fifo_full,
+	SA5_performant_intr_pending,
+	SA5_performant_completed,
+};
+
 struct board_type {
 	u32	board_id;
 	char	*product_name;

diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index b5cc705..b5125dc 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h

@@ -151,7 +151,7 @@
 #define HPSA_VPD_HEADER_SZ              4
 
 /* Logical volume states */
-#define HPSA_VPD_LV_STATUS_UNSUPPORTED			-1
+#define HPSA_VPD_LV_STATUS_UNSUPPORTED			0xff
 #define HPSA_LV_OK                                      0x0
 #define HPSA_LV_UNDERGOING_ERASE			0x0F
 #define HPSA_LV_UNDERGOING_RPI				0x12
@@ -238,11 +238,21 @@
 	u8 LUN[HPSA_MAX_LUN][8];
 };
 
+struct ext_report_lun_entry {
+	u8 lunid[8];
+	u8 wwid[8];
+	u8 device_type;
+	u8 device_flags;
+	u8 lun_count; /* multi-lun device, how many luns */
+	u8 redundant_paths;
+	u32 ioaccel_handle; /* ioaccel1 only uses lower 16 bits */
+};
+
 struct ReportExtendedLUNdata {
 	u8 LUNListLength[4];
 	u8 extended_response_flag;
 	u8 reserved[3];
-	u8 LUN[HPSA_MAX_LUN][24];
+	struct ext_report_lun_entry LUN[HPSA_MAX_LUN];
 };
 
 struct SenseSubsystem_info {
@@ -375,6 +385,7 @@
  *        or a bus address.
  */
 
+#define COMMANDLIST_ALIGNMENT 128
 struct CommandList {
 	struct CommandListHeader Header;
 	struct RequestBlock      Request;
@@ -389,21 +400,7 @@
 	struct list_head list;
 	struct completion *waiting;
 	void   *scsi_cmd;
-
-/* on 64 bit architectures, to get this to be 32-byte-aligned
- * it so happens we need PAD_64 bytes of padding, on 32 bit systems,
- * we need PAD_32 bytes of padding (see below).   This does that.
- * If it happens that 64 bit and 32 bit systems need different
- * padding, PAD_32 and PAD_64 can be set independently, and.
- * the code below will do the right thing.
- */
-#define IS_32_BIT ((8 - sizeof(long))/4)
-#define IS_64_BIT (!IS_32_BIT)
-#define PAD_32 (40)
-#define PAD_64 (12)
-#define COMMANDLIST_PAD (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
-	u8 pad[COMMANDLIST_PAD];
-};
+} __aligned(COMMANDLIST_ALIGNMENT);
 
 /* Max S/G elements in I/O accelerator command */
 #define IOACCEL1_MAXSGENTRIES           24
@@ -413,6 +410,7 @@
  * Structure for I/O accelerator (mode 1) commands.
  * Note that this structure must be 128-byte aligned in size.
  */
+#define IOACCEL1_COMMANDLIST_ALIGNMENT 128
 struct io_accel1_cmd {
 	u16 dev_handle;			/* 0x00 - 0x01 */
 	u8  reserved1;			/* 0x02 */
@@ -440,12 +438,7 @@
 	struct vals32 host_addr;	/* 0x70 - 0x77 */
 	u8  CISS_LUN[8];		/* 0x78 - 0x7F */
 	struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES];
-#define IOACCEL1_PAD_64 0
-#define IOACCEL1_PAD_32 0
-#define IOACCEL1_PAD (IS_32_BIT * IOACCEL1_PAD_32 + \
-			IS_64_BIT * IOACCEL1_PAD_64)
-	u8 pad[IOACCEL1_PAD];
-};
+} __aligned(IOACCEL1_COMMANDLIST_ALIGNMENT);
 
 #define IOACCEL1_FUNCTION_SCSIIO        0x00
 #define IOACCEL1_SGLOFFSET              32
@@ -510,14 +503,11 @@
 	u8 sense_data_buff[32];		/* sense/response data buffer */
 };
 
-#define IOACCEL2_64_PAD 76
-#define IOACCEL2_32_PAD 76
-#define IOACCEL2_PAD (IS_32_BIT * IOACCEL2_32_PAD + \
-			IS_64_BIT * IOACCEL2_64_PAD)
 /*
  * Structure for I/O accelerator (mode 2 or m2) commands.
  * Note that this structure must be 128-byte aligned in size.
  */
+#define IOACCEL2_COMMANDLIST_ALIGNMENT 128
 struct io_accel2_cmd {
 	u8  IU_type;			/* IU Type */
 	u8  direction;			/* direction, memtype, and encryption */
@@ -544,8 +534,7 @@
 	u32 tweak_upper;		/* Encryption tweak, upper 4 bytes */
 	struct ioaccel2_sg_element sg[IOACCEL2_MAXSGENTRIES];
 	struct io_accel2_scsi_response error_data;
-	u8 pad[IOACCEL2_PAD];
-};
+} __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT);
 
 /*
  * defines for Mode 2 command struct
@@ -636,7 +625,7 @@
 	u32            RepQCount;
 	u32            RepQCtrAddrLow32;
 	u32            RepQCtrAddrHigh32;
-#define MAX_REPLY_QUEUES 8
+#define MAX_REPLY_QUEUES 64
 	struct vals32  RepQAddr[MAX_REPLY_QUEUES];
 };
 

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index ecd7bd3..3d1bc67 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c

@@ -338,7 +338,7 @@
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 	struct iscsi_scsi_req *hdr;
-	unsigned hdrlength, cmd_len;
+	unsigned hdrlength, cmd_len, transfer_length;
 	itt_t itt;
 	int rc;
 
@@ -391,11 +391,11 @@
 	if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL)
 		task->protected = true;
 
+	transfer_length = scsi_transfer_length(sc);
+	hdr->data_length = cpu_to_be32(transfer_length);
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
-		unsigned out_len = scsi_out(sc)->length;
 		struct iscsi_r2t_info *r2t = &task->unsol_r2t;
 
-		hdr->data_length = cpu_to_be32(out_len);
 		hdr->flags |= ISCSI_FLAG_CMD_WRITE;
 		/*
 		 * Write counters:
@@ -414,18 +414,19 @@
 		memset(r2t, 0, sizeof(*r2t));
 
 		if (session->imm_data_en) {
-			if (out_len >= session->first_burst)
+			if (transfer_length >= session->first_burst)
 				task->imm_count = min(session->first_burst,
 							conn->max_xmit_dlength);
 			else
-				task->imm_count = min(out_len,
-							conn->max_xmit_dlength);
+				task->imm_count = min(transfer_length,
+						      conn->max_xmit_dlength);
 			hton24(hdr->dlength, task->imm_count);
 		} else
 			zero_data(hdr->dlength);
 
 		if (!session->initial_r2t_en) {
-			r2t->data_length = min(session->first_burst, out_len) -
+			r2t->data_length = min(session->first_burst,
+					       transfer_length) -
 					       task->imm_count;
 			r2t->data_offset = task->imm_count;
 			r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
@@ -438,7 +439,6 @@
 	} else {
 		hdr->flags |= ISCSI_FLAG_CMD_FINAL;
 		zero_data(hdr->dlength);
-		hdr->data_length = cpu_to_be32(scsi_in(sc)->length);
 
 		if (sc->sc_data_direction == DMA_FROM_DEVICE)
 			hdr->flags |= ISCSI_FLAG_CMD_READ;
@@ -466,7 +466,7 @@
 			  scsi_bidi_cmnd(sc) ? "bidirectional" :
 			  sc->sc_data_direction == DMA_TO_DEVICE ?
 			  "write" : "read", conn->id, sc, sc->cmnd[0],
-			  task->itt, scsi_bufflen(sc),
+			  task->itt, transfer_length,
 			  scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0,
 			  session->cmdsn,
 			  session->max_cmdsn - session->exp_cmdsn + 1);

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 94a3caf..434e903 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -640,6 +640,7 @@
 #define HBA_DEVLOSS_TMO         0x2000 /* HBA in devloss timeout */
 #define HBA_RRQ_ACTIVE		0x4000 /* process the rrq active list */
 #define HBA_FCP_IOQ_FLUSH	0x8000 /* FCP I/O queues being flushed */
+#define HBA_FW_DUMP_OP		0x10000 /* Skips fn reset before FW dump */
 	uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
 	struct lpfc_dmabuf slim2p;
 

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 8d5b6ce..1d7a5c3 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -919,10 +919,15 @@
 		phba->cfg_sriov_nr_virtfn = 0;
 	}
 
+	if (opcode == LPFC_FW_DUMP)
+		phba->hba_flag |= HBA_FW_DUMP_OP;
+
 	status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
 
-	if (status != 0)
+	if (status != 0) {
+		phba->hba_flag &= ~HBA_FW_DUMP_OP;
 		return status;
+	}
 
 	/* wait for the device to be quiesced before firmware reset */
 	msleep(100);
@@ -2364,7 +2369,7 @@
 	uint8_t wwpn[WWN_SZ];
 	int rc;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	/* count may include a LF at end of string */
@@ -2432,7 +2437,7 @@
 	uint8_t wwpn[WWN_SZ];
 	int rc;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	/* count may include a LF at end of string */
@@ -2499,7 +2504,7 @@
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
 	int val = 0;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (!isdigit(buf[0]))
@@ -2565,7 +2570,7 @@
 
 	int rc = 0;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (oas_state) {
@@ -2670,7 +2675,7 @@
 	uint64_t oas_lun;
 	int len = 0;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (wwn_to_u64(phba->cfg_oas_vpt_wwpn) == 0)
@@ -2716,7 +2721,7 @@
 	uint64_t scsi_lun;
 	ssize_t rc;
 
-	if (!phba->cfg_EnableXLane)
+	if (!phba->cfg_fof)
 		return -EPERM;
 
 	if (wwn_to_u64(phba->cfg_oas_vpt_wwpn) == 0)
@@ -4655,7 +4660,7 @@
 #       0x0 - 0x7f  = CS_CTL field in FC header (high 7 bits)
 # Value range is [0x0,0x7f]. Default value is 0
 */
-LPFC_ATTR_R(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
+LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
 
 /*
 # lpfc_enable_bg: Enable BlockGuard (Emulex's Implementation of T10-DIF)

diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index ca2f4ea..5b5c825 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2009-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index a94d4c9..928ef60 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2010-2012 Emulex.  All rights reserved.                *
+ * Copyright (C) 2010-2014 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index adda0bf..db5604f 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -289,6 +289,7 @@
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
+void lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba);
 void lpfc_sli_hba_iocb_abort(struct lpfc_hba *);
 void lpfc_sli_flush_fcp_rings(struct lpfc_hba *);
 int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *,
@@ -310,6 +311,9 @@
 int lpfc_sli_sum_iocb(struct lpfc_vport *, uint16_t, uint64_t, lpfc_ctx_cmd);
 int lpfc_sli_abort_iocb(struct lpfc_vport *, struct lpfc_sli_ring *, uint16_t,
 			uint64_t, lpfc_ctx_cmd);
+int
+lpfc_sli_abort_taskmgmt(struct lpfc_vport *, struct lpfc_sli_ring *,
+			uint16_t, uint64_t, lpfc_ctx_cmd);
 
 void lpfc_mbox_timeout(unsigned long);
 void lpfc_mbox_timeout_handler(struct lpfc_hba *);

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 828c08e..b0aedce 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2007-2012 Emulex.  All rights reserved.           *
+ * Copyright (C) 2007-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -2314,7 +2314,7 @@
 			goto too_big;
 	}
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		/* OAS CQ */
 		qp = phba->sli4_hba.oas_cq;

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 624fe0b..7a5d81a 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 294c072..2a17e31 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -5634,6 +5634,9 @@
 		ndlp->active_rrqs_xri_bitmap =
 				mempool_alloc(vport->phba->active_rrq_pool,
 					      GFP_KERNEL);
+		if (ndlp->active_rrqs_xri_bitmap)
+			memset(ndlp->active_rrqs_xri_bitmap, 0,
+			       ndlp->phba->cfg_rrq_xri_bitmap_sz);
 	}
 
 

diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 3d9438c..2362592 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index fd79f7d..f432ec1 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2013 Emulex.  All rights reserved.                *
+ * Copyright (C) 2009-2014 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 635eeb3..06f9a5b 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -820,7 +820,139 @@
 }
 
 /**
+ * lpfc_sli4_free_sp_events - Cleanup sp_queue_events to free
+ * rspiocb which got deferred
+ *
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will cleanup completed slow path events after HBA is reset
+ * when bringing down the SLI Layer.
+ *
+ *
+ * Return codes
+ *   void.
+ **/
+static void
+lpfc_sli4_free_sp_events(struct lpfc_hba *phba)
+{
+	struct lpfc_iocbq *rspiocbq;
+	struct hbq_dmabuf *dmabuf;
+	struct lpfc_cq_event *cq_event;
+
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~HBA_SP_QUEUE_EVT;
+	spin_unlock_irq(&phba->hbalock);
+
+	while (!list_empty(&phba->sli4_hba.sp_queue_event)) {
+		/* Get the response iocb from the head of work queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_queue_event,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+
+		switch (bf_get(lpfc_wcqe_c_code, &cq_event->cqe.wcqe_cmpl)) {
+		case CQE_CODE_COMPL_WQE:
+			rspiocbq = container_of(cq_event, struct lpfc_iocbq,
+						 cq_event);
+			lpfc_sli_release_iocbq(phba, rspiocbq);
+			break;
+		case CQE_CODE_RECEIVE:
+		case CQE_CODE_RECEIVE_V1:
+			dmabuf = container_of(cq_event, struct hbq_dmabuf,
+					      cq_event);
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+		}
+	}
+}
+
+/**
+ * lpfc_hba_free_post_buf - Perform lpfc uninitialization after HBA reset
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will cleanup posted ELS buffers after the HBA is reset
+ * when bringing down the SLI Layer.
+ *
+ *
+ * Return codes
+ *   void.
+ **/
+static void
+lpfc_hba_free_post_buf(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
+	struct lpfc_dmabuf *mp, *next_mp;
+	LIST_HEAD(buflist);
+	int count;
+
+	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)
+		lpfc_sli_hbqbuf_free_all(phba);
+	else {
+		/* Cleanup preposted buffers on the ELS ring */
+		pring = &psli->ring[LPFC_ELS_RING];
+		spin_lock_irq(&phba->hbalock);
+		list_splice_init(&pring->postbufq, &buflist);
+		spin_unlock_irq(&phba->hbalock);
+
+		count = 0;
+		list_for_each_entry_safe(mp, next_mp, &buflist, list) {
+			list_del(&mp->list);
+			count++;
+			lpfc_mbuf_free(phba, mp->virt, mp->phys);
+			kfree(mp);
+		}
+
+		spin_lock_irq(&phba->hbalock);
+		pring->postbufq_cnt -= count;
+		spin_unlock_irq(&phba->hbalock);
+	}
+}
+
+/**
+ * lpfc_hba_clean_txcmplq - Perform lpfc uninitialization after HBA reset
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will cleanup the txcmplq after the HBA is reset when bringing
+ * down the SLI Layer.
+ *
+ * Return codes
+ *   void
+ **/
+static void
+lpfc_hba_clean_txcmplq(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
+	LIST_HEAD(completions);
+	int i;
+
+	for (i = 0; i < psli->num_rings; i++) {
+		pring = &psli->ring[i];
+		if (phba->sli_rev >= LPFC_SLI_REV4)
+			spin_lock_irq(&pring->ring_lock);
+		else
+			spin_lock_irq(&phba->hbalock);
+		/* At this point in time the HBA is either reset or DOA. Either
+		 * way, nothing should be on txcmplq as it will NEVER complete.
+		 */
+		list_splice_init(&pring->txcmplq, &completions);
+		pring->txcmplq_cnt = 0;
+
+		if (phba->sli_rev >= LPFC_SLI_REV4)
+			spin_unlock_irq(&pring->ring_lock);
+		else
+			spin_unlock_irq(&phba->hbalock);
+
+		/* Cancel all the IOCBs from the completions list */
+		lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_ABORTED);
+		lpfc_sli_abort_iocb_ring(phba, pring);
+	}
+}
+
+/**
  * lpfc_hba_down_post_s3 - Perform lpfc uninitialization after HBA reset
+	int i;
  * @phba: pointer to lpfc HBA data structure.
  *
  * This routine will do uninitialization after the HBA is reset when bring
@@ -833,44 +965,8 @@
 static int
 lpfc_hba_down_post_s3(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_dmabuf *mp, *next_mp;
-	LIST_HEAD(completions);
-	int i;
-
-	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)
-		lpfc_sli_hbqbuf_free_all(phba);
-	else {
-		/* Cleanup preposted buffers on the ELS ring */
-		pring = &psli->ring[LPFC_ELS_RING];
-		list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
-			list_del(&mp->list);
-			pring->postbufq_cnt--;
-			lpfc_mbuf_free(phba, mp->virt, mp->phys);
-			kfree(mp);
-		}
-	}
-
-	spin_lock_irq(&phba->hbalock);
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-
-		/* At this point in time the HBA is either reset or DOA. Either
-		 * way, nothing should be on txcmplq as it will NEVER complete.
-		 */
-		list_splice_init(&pring->txcmplq, &completions);
-		spin_unlock_irq(&phba->hbalock);
-
-		/* Cancel all the IOCBs from the completions list */
-		lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
-				      IOERR_SLI_ABORTED);
-
-		lpfc_sli_abort_iocb_ring(phba, pring);
-		spin_lock_irq(&phba->hbalock);
-	}
-	spin_unlock_irq(&phba->hbalock);
-
+	lpfc_hba_free_post_buf(phba);
+	lpfc_hba_clean_txcmplq(phba);
 	return 0;
 }
 
@@ -890,13 +986,12 @@
 {
 	struct lpfc_scsi_buf *psb, *psb_next;
 	LIST_HEAD(aborts);
-	int ret;
 	unsigned long iflag = 0;
 	struct lpfc_sglq *sglq_entry = NULL;
 
-	ret = lpfc_hba_down_post_s3(phba);
-	if (ret)
-		return ret;
+	lpfc_hba_free_post_buf(phba);
+	lpfc_hba_clean_txcmplq(phba);
+
 	/* At this point in time the HBA is either reset or DOA. Either
 	 * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be
 	 * on the lpfc_sgl_list so that it can either be freed if the
@@ -932,6 +1027,8 @@
 	spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
 	list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
 	spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+
+	lpfc_sli4_free_sp_events(phba);
 	return 0;
 }
 
@@ -1250,7 +1347,6 @@
 lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
 {
 	uint32_t old_host_status = phba->work_hs;
-	struct lpfc_sli_ring  *pring;
 	struct lpfc_sli *psli = &phba->sli;
 
 	/* If the pci channel is offline, ignore possible errors,
@@ -1279,8 +1375,7 @@
 	 * dropped by the firmware. Error iocb (I/O) on txcmplq and let the
 	 * SCSI layer retry it after re-establishing link.
 	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 
 	/*
 	 * There was a firmware error. Take the hba offline and then
@@ -1348,7 +1443,6 @@
 {
 	struct lpfc_vport *vport = phba->pport;
 	struct lpfc_sli   *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
 	uint32_t event_data;
 	unsigned long temperature;
 	struct temp_event temp_event_data;
@@ -1400,8 +1494,7 @@
 		* Error iocb (I/O) on txcmplq and let the SCSI layer
 		* retry it after re-establishing link.
 		*/
-		pring = &psli->ring[psli->fcp_ring];
-		lpfc_sli_abort_iocb_ring(phba, pring);
+		lpfc_sli_abort_fcp_rings(phba);
 
 		/*
 		 * There was a firmware error.  Take the hba offline and then
@@ -1940,78 +2033,81 @@
 
 	switch (dev_id) {
 	case PCI_DEVICE_ID_FIREFLY:
-		m = (typeof(m)){"LP6000", "PCI", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP6000", "PCI",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_SUPERFLY:
 		if (vp->rev.biuRev >= 1 && vp->rev.biuRev <= 3)
-			m = (typeof(m)){"LP7000", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP7000", "PCI", ""};
 		else
-			m = (typeof(m)){"LP7000E", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP7000E", "PCI", ""};
+		m.function = "Obsolete, Unsupported Fibre Channel Adapter";
 		break;
 	case PCI_DEVICE_ID_DRAGONFLY:
 		m = (typeof(m)){"LP8000", "PCI",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_CENTAUR:
 		if (FC_JEDEC_ID(vp->rev.biuRev) == CENTAUR_2G_JEDEC_ID)
-			m = (typeof(m)){"LP9002", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP9002", "PCI", ""};
 		else
-			m = (typeof(m)){"LP9000", "PCI",
-					"Fibre Channel Adapter"};
+			m = (typeof(m)){"LP9000", "PCI", ""};
+		m.function = "Obsolete, Unsupported Fibre Channel Adapter";
 		break;
 	case PCI_DEVICE_ID_RFLY:
 		m = (typeof(m)){"LP952", "PCI",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PEGASUS:
 		m = (typeof(m)){"LP9802", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_THOR:
 		m = (typeof(m)){"LP10000", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_VIPER:
 		m = (typeof(m)){"LPX1000",  "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PFLY:
 		m = (typeof(m)){"LP982", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_TFLY:
 		m = (typeof(m)){"LP1050", "PCI-X",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HELIOS:
 		m = (typeof(m)){"LP11000", "PCI-X2",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HELIOS_SCSP:
 		m = (typeof(m)){"LP11000-SP", "PCI-X2",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HELIOS_DCSP:
 		m = (typeof(m)){"LP11002-SP",  "PCI-X2",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_NEPTUNE:
-		m = (typeof(m)){"LPe1000", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe1000", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_NEPTUNE_SCSP:
-		m = (typeof(m)){"LPe1000-SP", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe1000-SP", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_NEPTUNE_DCSP:
-		m = (typeof(m)){"LPe1002-SP", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe1002-SP", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_BMID:
 		m = (typeof(m)){"LP1150", "PCI-X2", "Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_BSMB:
-		m = (typeof(m)){"LP111", "PCI-X2", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP111", "PCI-X2",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_ZEPHYR:
 		m = (typeof(m)){"LPe11000", "PCIe", "Fibre Channel Adapter"};
@@ -2030,16 +2126,20 @@
 		m = (typeof(m)){"LPe111", "PCIe", "Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LP101:
-		m = (typeof(m)){"LP101", "PCI-X", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP101", "PCI-X",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LP10000S:
-		m = (typeof(m)){"LP10000-S", "PCI", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP10000-S", "PCI",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LP11000S:
-		m = (typeof(m)){"LP11000-S", "PCI-X2", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LP11000-S", "PCI-X2",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LPE11000S:
-		m = (typeof(m)){"LPe11000-S", "PCIe", "Fibre Channel Adapter"};
+		m = (typeof(m)){"LPe11000-S", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_SAT:
 		m = (typeof(m)){"LPe12000", "PCIe", "Fibre Channel Adapter"};
@@ -2060,20 +2160,21 @@
 		m = (typeof(m)){"LPe12000-S", "PCIe", "Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_HORNET:
-		m = (typeof(m)){"LP21000", "PCIe", "FCoE Adapter"};
+		m = (typeof(m)){"LP21000", "PCIe",
+				"Obsolete, Unsupported FCoE Adapter"};
 		GE = 1;
 		break;
 	case PCI_DEVICE_ID_PROTEUS_VF:
 		m = (typeof(m)){"LPev12000", "PCIe IOV",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PROTEUS_PF:
 		m = (typeof(m)){"LPev12000", "PCIe IOV",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_PROTEUS_S:
 		m = (typeof(m)){"LPemv12002-S", "PCIe IOV",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_TIGERSHARK:
 		oneConnect = 1;
@@ -2089,17 +2190,24 @@
 		break;
 	case PCI_DEVICE_ID_BALIUS:
 		m = (typeof(m)){"LPVe12002", "PCIe Shared I/O",
-				"Fibre Channel Adapter"};
+				"Obsolete, Unsupported Fibre Channel Adapter"};
 		break;
 	case PCI_DEVICE_ID_LANCER_FC:
-	case PCI_DEVICE_ID_LANCER_FC_VF:
 		m = (typeof(m)){"LPe16000", "PCIe", "Fibre Channel Adapter"};
 		break;
+	case PCI_DEVICE_ID_LANCER_FC_VF:
+		m = (typeof(m)){"LPe16000", "PCIe",
+				"Obsolete, Unsupported Fibre Channel Adapter"};
+		break;
 	case PCI_DEVICE_ID_LANCER_FCOE:
-	case PCI_DEVICE_ID_LANCER_FCOE_VF:
 		oneConnect = 1;
 		m = (typeof(m)){"OCe15100", "PCIe", "FCoE"};
 		break;
+	case PCI_DEVICE_ID_LANCER_FCOE_VF:
+		oneConnect = 1;
+		m = (typeof(m)){"OCe15100", "PCIe",
+				"Obsolete, Unsupported FCoE"};
+		break;
 	case PCI_DEVICE_ID_SKYHAWK:
 	case PCI_DEVICE_ID_SKYHAWK_VF:
 		oneConnect = 1;
@@ -4614,7 +4722,10 @@
 		phba->link_state = LPFC_HBA_ERROR;
 		return;
 	}
-	lpfc_offline_prep(phba, LPFC_MBX_WAIT);
+	if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
+		lpfc_offline_prep(phba, LPFC_MBX_WAIT);
+	else
+		lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
 	lpfc_offline(phba);
 	lpfc_sli_brdrestart(phba);
 	lpfc_online(phba);
@@ -9663,9 +9774,6 @@
 static void
 lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
-
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"2723 PCI channel I/O abort preparing for recovery\n");
 
@@ -9673,8 +9781,7 @@
 	 * There may be errored I/Os through HBA, abort all I/Os on txcmplq
 	 * and let the SCSI mid-layer to retry them to recover.
 	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 }
 
 /**
@@ -10417,17 +10524,13 @@
 static void
 lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
-
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"2828 PCI channel I/O abort preparing for recovery\n");
 	/*
 	 * There may be errored I/Os through HBA, abort all I/Os on txcmplq
 	 * and let the SCSI mid-layer to retry them to recover.
 	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 }
 
 /**
@@ -10898,7 +11001,7 @@
 	if (phba->sli4_hba.pc_sli4_params.oas_supported) {
 		phba->cfg_fof = 1;
 	} else {
-		phba->cfg_EnableXLane = 0;
+		phba->cfg_fof = 0;
 		if (phba->device_data_mem_pool)
 			mempool_destroy(phba->device_data_mem_pool);
 		phba->device_data_mem_pool = NULL;
@@ -10928,7 +11031,7 @@
 	if (rc)
 		return -ENOMEM;
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		rc = lpfc_cq_create(phba, phba->sli4_hba.oas_cq,
 				    phba->sli4_hba.fof_eq, LPFC_WCQ, LPFC_FCP);
@@ -10947,8 +11050,7 @@
 	return 0;
 
 out_oas_wq:
-	if (phba->cfg_EnableXLane)
-		lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq);
+	lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq);
 out_oas_cq:
 	lpfc_eq_destroy(phba, phba->sli4_hba.fof_eq);
 	return rc;
@@ -10982,7 +11084,7 @@
 
 	phba->sli4_hba.fof_eq = qdesc;
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		/* Create OAS CQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,

diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index ed419aa..3fa6533 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2012 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 462453e..2df11da 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -73,7 +73,7 @@
 {
 	struct lpfc_vport *vport = (struct lpfc_vport *)sdev->host->hostdata;
 
-	if (vport->phba->cfg_EnableXLane)
+	if (vport->phba->cfg_fof)
 		return ((struct lpfc_device_data *)sdev->hostdata)->rport_data;
 	else
 		return (struct lpfc_rport_data *)sdev->hostdata;
@@ -3462,7 +3462,7 @@
 	 * If the OAS driver feature is enabled and the lun is enabled for
 	 * OAS, set the oas iocb related flags.
 	 */
-	if ((phba->cfg_EnableXLane) && ((struct lpfc_device_data *)
+	if ((phba->cfg_fof) && ((struct lpfc_device_data *)
 		scsi_cmnd->device->hostdata)->oas_enabled)
 		lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_OAS;
 	return 0;
@@ -4314,6 +4314,7 @@
 		fcp_cmnd->fcpCntl1 = SIMPLE_Q;
 
 	sli4 = (phba->sli_rev == LPFC_SLI_REV4);
+	piocbq->iocb.un.fcpi.fcpi_XRdy = 0;
 
 	/*
 	 * There are three possibilities here - use scatter-gather segment, use
@@ -4782,7 +4783,9 @@
 	struct lpfc_scsi_buf *lpfc_cmd;
 	IOCB_t *cmd, *icmd;
 	int ret = SUCCESS, status = 0;
-	unsigned long flags;
+	struct lpfc_sli_ring *pring_s4;
+	int ring_number, ret_val;
+	unsigned long flags, iflags;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
 
 	status = fc_block_scsi_eh(cmnd);
@@ -4833,6 +4836,14 @@
 
 	BUG_ON(iocb->context1 != lpfc_cmd);
 
+	/* abort issued in recovery is still in progress */
+	if (iocb->iocb_flag & LPFC_DRIVER_ABORTED) {
+		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+			 "3389 SCSI Layer I/O Abort Request is pending\n");
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		goto wait_for_cmpl;
+	}
+
 	abtsiocb = __lpfc_sli_get_iocbq(phba);
 	if (abtsiocb == NULL) {
 		ret = FAILED;
@@ -4871,11 +4882,23 @@
 
 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
 	abtsiocb->vport = vport;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		ring_number = MAX_SLI3_CONFIGURED_RINGS + iocb->fcp_wqidx;
+		pring_s4 = &phba->sli.ring[ring_number];
+		/* Note: both hbalock and ring_lock must be set here */
+		spin_lock_irqsave(&pring_s4->ring_lock, iflags);
+		ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
+						abtsiocb, 0);
+		spin_unlock_irqrestore(&pring_s4->ring_lock, iflags);
+	} else {
+		ret_val = __lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
+						abtsiocb, 0);
+	}
 	/* no longer need the lock after this point */
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
-	if (lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0) ==
-	    IOCB_ERROR) {
+
+	if (ret_val == IOCB_ERROR) {
 		lpfc_sli_release_iocbq(phba, abtsiocb);
 		ret = FAILED;
 		goto out;
@@ -4885,12 +4908,16 @@
 		lpfc_sli_handle_fast_ring_event(phba,
 			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
+wait_for_cmpl:
 	lpfc_cmd->waitq = &waitq;
 	/* Wait for abort to complete */
 	wait_event_timeout(waitq,
 			  (lpfc_cmd->pCmd != cmnd),
 			   msecs_to_jiffies(2*vport->cfg_devloss_tmo*1000));
+
+	spin_lock_irqsave(shost->host_lock, flags);
 	lpfc_cmd->waitq = NULL;
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	if (lpfc_cmd->pCmd == cmnd) {
 		ret = FAILED;
@@ -5172,8 +5199,9 @@
 
 	cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
 	if (cnt)
-		lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring],
-				    tgt_id, lun_id, context);
+		lpfc_sli_abort_taskmgmt(vport,
+					&phba->sli.ring[phba->sli.fcp_ring],
+					tgt_id, lun_id, context);
 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
 	while (time_after(later, jiffies) && cnt) {
 		schedule_timeout_uninterruptible(msecs_to_jiffies(20));
@@ -5491,7 +5519,7 @@
 	if (!rport || fc_remote_port_chkready(rport))
 		return -ENXIO;
 
-	if (phba->cfg_EnableXLane) {
+	if (phba->cfg_fof) {
 
 		/*
 		 * Check to see if the device data structure for the lun
@@ -5616,7 +5644,7 @@
 	struct lpfc_device_data *device_data = sdev->hostdata;
 
 	atomic_dec(&phba->sdev_cnt);
-	if ((phba->cfg_EnableXLane) && (device_data)) {
+	if ((phba->cfg_fof) && (device_data)) {
 		spin_lock_irqsave(&phba->devicelock, flags);
 		device_data->available = false;
 		if (!device_data->oas_enabled)
@@ -5655,7 +5683,7 @@
 	int memory_flags;
 
 	if (unlikely(!phba) || !vport_wwpn || !target_wwpn  ||
-	    !(phba->cfg_EnableXLane))
+	    !(phba->cfg_fof))
 		return NULL;
 
 	/* Attempt to create the device data to contain lun info */
@@ -5693,7 +5721,7 @@
 {
 
 	if (unlikely(!phba) || !lun_info  ||
-	    !(phba->cfg_EnableXLane))
+	    !(phba->cfg_fof))
 		return;
 
 	if (!list_empty(&lun_info->listentry))
@@ -5727,7 +5755,7 @@
 	struct lpfc_device_data *lun_info;
 
 	if (unlikely(!phba) || !list || !vport_wwpn || !target_wwpn ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return NULL;
 
 	/* Check to see if the lun is already enabled for OAS. */
@@ -5789,7 +5817,7 @@
 	    !starting_lun || !found_vport_wwpn ||
 	    !found_target_wwpn || !found_lun || !found_lun_status ||
 	    (*starting_lun == NO_MORE_OAS_LUN) ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return false;
 
 	lun = *starting_lun;
@@ -5873,7 +5901,7 @@
 	unsigned long flags;
 
 	if (unlikely(!phba) || !vport_wwpn || !target_wwpn ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return false;
 
 	spin_lock_irqsave(&phba->devicelock, flags);
@@ -5930,7 +5958,7 @@
 	unsigned long flags;
 
 	if (unlikely(!phba) || !vport_wwpn || !target_wwpn ||
-	    !phba->cfg_EnableXLane)
+	    !phba->cfg_fof)
 		return false;
 
 	spin_lock_irqsave(&phba->devicelock, flags);

diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 0120bfc..0389ac1 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 393662c..32ada05 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -3532,14 +3532,27 @@
 	/* Error everything on txq and txcmplq
 	 * First do the txq.
 	 */
-	spin_lock_irq(&phba->hbalock);
-	list_splice_init(&pring->txq, &completions);
+	if (phba->sli_rev >= LPFC_SLI_REV4) {
+		spin_lock_irq(&pring->ring_lock);
+		list_splice_init(&pring->txq, &completions);
+		pring->txq_cnt = 0;
+		spin_unlock_irq(&pring->ring_lock);
 
-	/* Next issue ABTS for everything on the txcmplq */
-	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
-		lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+		spin_lock_irq(&phba->hbalock);
+		/* Next issue ABTS for everything on the txcmplq */
+		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+		spin_unlock_irq(&phba->hbalock);
+	} else {
+		spin_lock_irq(&phba->hbalock);
+		list_splice_init(&pring->txq, &completions);
+		pring->txq_cnt = 0;
 
-	spin_unlock_irq(&phba->hbalock);
+		/* Next issue ABTS for everything on the txcmplq */
+		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+		spin_unlock_irq(&phba->hbalock);
+	}
 
 	/* Cancel all the IOCBs from the completions list */
 	lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
@@ -3547,6 +3560,36 @@
 }
 
 /**
+ * lpfc_sli_abort_fcp_rings - Abort all iocbs in all FCP rings
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ *
+ * This function aborts all iocbs in FCP rings and frees all the iocb
+ * objects in txq. This function issues an abort iocb for all the iocb commands
+ * in txcmplq. The iocbs in the txcmplq is not guaranteed to complete before
+ * the return of this function. The caller is not required to hold any locks.
+ **/
+void
+lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring  *pring;
+	uint32_t i;
+
+	/* Look on all the FCP Rings for the iotag */
+	if (phba->sli_rev >= LPFC_SLI_REV4) {
+		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
+			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
+			lpfc_sli_abort_iocb_ring(phba, pring);
+		}
+	} else {
+		pring = &psli->ring[psli->fcp_ring];
+		lpfc_sli_abort_iocb_ring(phba, pring);
+	}
+}
+
+
+/**
  * lpfc_sli_flush_fcp_rings - flush all iocbs in the fcp ring
  * @phba: Pointer to HBA context object.
  *
@@ -3563,28 +3606,55 @@
 	LIST_HEAD(txcmplq);
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring  *pring;
-
-	/* Currently, only one fcp ring */
-	pring = &psli->ring[psli->fcp_ring];
+	uint32_t i;
 
 	spin_lock_irq(&phba->hbalock);
-	/* Retrieve everything on txq */
-	list_splice_init(&pring->txq, &txq);
-
-	/* Retrieve everything on the txcmplq */
-	list_splice_init(&pring->txcmplq, &txcmplq);
-
 	/* Indicate the I/O queues are flushed */
 	phba->hba_flag |= HBA_FCP_IOQ_FLUSH;
 	spin_unlock_irq(&phba->hbalock);
 
-	/* Flush the txq */
-	lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT,
-			      IOERR_SLI_DOWN);
+	/* Look on all the FCP Rings for the iotag */
+	if (phba->sli_rev >= LPFC_SLI_REV4) {
+		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
+			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
 
-	/* Flush the txcmpq */
-	lpfc_sli_cancel_iocbs(phba, &txcmplq, IOSTAT_LOCAL_REJECT,
-			      IOERR_SLI_DOWN);
+			spin_lock_irq(&pring->ring_lock);
+			/* Retrieve everything on txq */
+			list_splice_init(&pring->txq, &txq);
+			/* Retrieve everything on the txcmplq */
+			list_splice_init(&pring->txcmplq, &txcmplq);
+			pring->txq_cnt = 0;
+			pring->txcmplq_cnt = 0;
+			spin_unlock_irq(&pring->ring_lock);
+
+			/* Flush the txq */
+			lpfc_sli_cancel_iocbs(phba, &txq,
+					      IOSTAT_LOCAL_REJECT,
+					      IOERR_SLI_DOWN);
+			/* Flush the txcmpq */
+			lpfc_sli_cancel_iocbs(phba, &txcmplq,
+					      IOSTAT_LOCAL_REJECT,
+					      IOERR_SLI_DOWN);
+		}
+	} else {
+		pring = &psli->ring[psli->fcp_ring];
+
+		spin_lock_irq(&phba->hbalock);
+		/* Retrieve everything on txq */
+		list_splice_init(&pring->txq, &txq);
+		/* Retrieve everything on the txcmplq */
+		list_splice_init(&pring->txcmplq, &txcmplq);
+		pring->txq_cnt = 0;
+		pring->txcmplq_cnt = 0;
+		spin_unlock_irq(&phba->hbalock);
+
+		/* Flush the txq */
+		lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_DOWN);
+		/* Flush the txcmpq */
+		lpfc_sli_cancel_iocbs(phba, &txcmplq, IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_DOWN);
+	}
 }
 
 /**
@@ -3987,12 +4057,13 @@
 {
 	struct lpfc_sli *psli = &phba->sli;
 	uint16_t cfg_value;
-	int rc;
+	int rc = 0;
 
 	/* Reset HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"0295 Reset HBA Data: x%x x%x\n",
-			phba->pport->port_state, psli->sli_flag);
+			"0295 Reset HBA Data: x%x x%x x%x\n",
+			phba->pport->port_state, psli->sli_flag,
+			phba->hba_flag);
 
 	/* perform board reset */
 	phba->fc_eventTag = 0;
@@ -4005,6 +4076,12 @@
 	phba->fcf.fcf_flag = 0;
 	spin_unlock_irq(&phba->hbalock);
 
+	/* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */
+	if (phba->hba_flag & HBA_FW_DUMP_OP) {
+		phba->hba_flag &= ~HBA_FW_DUMP_OP;
+		return rc;
+	}
+
 	/* Now physically reset the device */
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 			"0389 Performing PCI function reset!\n");
@@ -5002,7 +5079,7 @@
 		} while (++fcp_eqidx < phba->cfg_fcp_io_channel);
 	}
 
-	if (phba->cfg_EnableXLane)
+	if (phba->cfg_fof)
 		lpfc_sli4_cq_release(phba->sli4_hba.oas_cq, LPFC_QUEUE_REARM);
 
 	if (phba->sli4_hba.hba_eq) {
@@ -6722,7 +6799,6 @@
 	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
 	MAILBOX_t *mb = &pmbox->u.mb;
 	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
 
 	/* If the mailbox completed, process the completion and return */
 	if (lpfc_sli4_process_missed_mbox_completions(phba))
@@ -6764,8 +6840,7 @@
 	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_sli_abort_fcp_rings(phba);
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 			"0345 Resetting board due to mailbox timeout\n");
@@ -8133,6 +8208,7 @@
 	abort_tag = (uint32_t) iocbq->iotag;
 	xritag = iocbq->sli4_xritag;
 	wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
+	wqe->generic.wqe_com.word10 = 0;
 	/* words0-2 bpl convert bde */
 	if (iocbq->iocb.un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
 		numBdes = iocbq->iocb.un.genreq64.bdl.bdeSize /
@@ -8639,8 +8715,7 @@
 
 	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
 	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-		if (!phba->cfg_EnableXLane || (!(piocb->iocb_flag &
-			LPFC_IO_OAS))) {
+		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS))) {
 			wq = phba->sli4_hba.fcp_wq[piocb->fcp_wqidx];
 		} else {
 			wq = phba->sli4_hba.oas_wq;
@@ -8735,7 +8810,7 @@
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		if (piocb->iocb_flag &  LPFC_IO_FCP) {
-			if (!phba->cfg_EnableXLane || (!(piocb->iocb_flag &
+			if (!phba->cfg_fof || (!(piocb->iocb_flag &
 				LPFC_IO_OAS))) {
 				if (unlikely(!phba->sli4_hba.fcp_wq))
 					return IOCB_ERROR;
@@ -9170,6 +9245,7 @@
 		pring->sli.sli3.next_cmdidx  = 0;
 		pring->sli.sli3.local_getidx = 0;
 		pring->sli.sli3.cmdidx = 0;
+		pring->flag = 0;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -9805,43 +9881,6 @@
 }
 
 /**
- * lpfc_sli_iocb_ring_abort - Unconditionally abort all iocbs on an iocb ring
- * @phba: Pointer to HBA context object.
- * @pring: Pointer to driver SLI ring object.
- *
- * This function aborts all iocbs in the given ring and frees all the iocb
- * objects in txq. This function issues abort iocbs unconditionally for all
- * the iocb commands in txcmplq. The iocbs in the txcmplq is not guaranteed
- * to complete before the return of this function. The caller is not required
- * to hold any locks.
- **/
-static void
-lpfc_sli_iocb_ring_abort(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
-{
-	LIST_HEAD(completions);
-	struct lpfc_iocbq *iocb, *next_iocb;
-
-	if (pring->ringno == LPFC_ELS_RING)
-		lpfc_fabric_abort_hba(phba);
-
-	spin_lock_irq(&phba->hbalock);
-
-	/* Take off all the iocbs on txq for cancelling */
-	list_splice_init(&pring->txq, &completions);
-	pring->txq_cnt = 0;
-
-	/* Next issue ABTS for everything on the txcmplq */
-	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
-		lpfc_sli_abort_iotag_issue(phba, pring, iocb);
-
-	spin_unlock_irq(&phba->hbalock);
-
-	/* Cancel all the IOCBs from the completions list */
-	lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
-			      IOERR_SLI_ABORTED);
-}
-
-/**
  * lpfc_sli_hba_iocb_abort - Abort all iocbs to an hba.
  * @phba: pointer to lpfc HBA data structure.
  *
@@ -9856,7 +9895,7 @@
 
 	for (i = 0; i < psli->num_rings; i++) {
 		pring = &psli->ring[i];
-		lpfc_sli_iocb_ring_abort(phba, pring);
+		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
 }
 
@@ -10081,6 +10120,124 @@
 }
 
 /**
+ * lpfc_sli_abort_taskmgmt - issue abort for all commands on a host/target/LUN
+ * @vport: Pointer to virtual port.
+ * @pring: Pointer to driver SLI ring object.
+ * @tgt_id: SCSI ID of the target.
+ * @lun_id: LUN ID of the scsi device.
+ * @taskmgmt_cmd: LPFC_CTX_LUN/LPFC_CTX_TGT/LPFC_CTX_HOST.
+ *
+ * This function sends an abort command for every SCSI command
+ * associated with the given virtual port pending on the ring
+ * filtered by lpfc_sli_validate_fcp_iocb function.
+ * When taskmgmt_cmd == LPFC_CTX_LUN, the function sends abort only to the
+ * FCP iocbs associated with lun specified by tgt_id and lun_id
+ * parameters
+ * When taskmgmt_cmd == LPFC_CTX_TGT, the function sends abort only to the
+ * FCP iocbs associated with SCSI target specified by tgt_id parameter.
+ * When taskmgmt_cmd == LPFC_CTX_HOST, the function sends abort to all
+ * FCP iocbs associated with virtual port.
+ * This function returns number of iocbs it aborted .
+ * This function is called with no locks held right after a taskmgmt
+ * command is sent.
+ **/
+int
+lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
+			uint16_t tgt_id, uint64_t lun_id, lpfc_ctx_cmd cmd)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_iocbq *abtsiocbq;
+	struct lpfc_iocbq *iocbq;
+	IOCB_t *icmd;
+	int sum, i, ret_val;
+	unsigned long iflags;
+	struct lpfc_sli_ring *pring_s4;
+	uint32_t ring_number;
+
+	spin_lock_irq(&phba->hbalock);
+
+	/* all I/Os are in process of being flushed */
+	if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) {
+		spin_unlock_irq(&phba->hbalock);
+		return 0;
+	}
+	sum = 0;
+
+	for (i = 1; i <= phba->sli.last_iotag; i++) {
+		iocbq = phba->sli.iocbq_lookup[i];
+
+		if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id,
+					       cmd) != 0)
+			continue;
+
+		/*
+		 * If the iocbq is already being aborted, don't take a second
+		 * action, but do count it.
+		 */
+		if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+			continue;
+
+		/* issue ABTS for this IOCB based on iotag */
+		abtsiocbq = __lpfc_sli_get_iocbq(phba);
+		if (abtsiocbq == NULL)
+			continue;
+
+		icmd = &iocbq->iocb;
+		abtsiocbq->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
+		abtsiocbq->iocb.un.acxri.abortContextTag = icmd->ulpContext;
+		if (phba->sli_rev == LPFC_SLI_REV4)
+			abtsiocbq->iocb.un.acxri.abortIoTag =
+							 iocbq->sli4_xritag;
+		else
+			abtsiocbq->iocb.un.acxri.abortIoTag = icmd->ulpIoTag;
+		abtsiocbq->iocb.ulpLe = 1;
+		abtsiocbq->iocb.ulpClass = icmd->ulpClass;
+		abtsiocbq->vport = vport;
+
+		/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+		abtsiocbq->fcp_wqidx = iocbq->fcp_wqidx;
+		if (iocbq->iocb_flag & LPFC_IO_FCP)
+			abtsiocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
+
+		if (lpfc_is_link_up(phba))
+			abtsiocbq->iocb.ulpCommand = CMD_ABORT_XRI_CN;
+		else
+			abtsiocbq->iocb.ulpCommand = CMD_CLOSE_XRI_CN;
+
+		/* Setup callback routine and issue the command. */
+		abtsiocbq->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
+
+		/*
+		 * Indicate the IO is being aborted by the driver and set
+		 * the caller's flag into the aborted IO.
+		 */
+		iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+		if (phba->sli_rev == LPFC_SLI_REV4) {
+			ring_number = MAX_SLI3_CONFIGURED_RINGS +
+					 iocbq->fcp_wqidx;
+			pring_s4 = &phba->sli.ring[ring_number];
+			/* Note: both hbalock and ring_lock must be set here */
+			spin_lock_irqsave(&pring_s4->ring_lock, iflags);
+			ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
+							abtsiocbq, 0);
+			spin_unlock_irqrestore(&pring_s4->ring_lock, iflags);
+		} else {
+			ret_val = __lpfc_sli_issue_iocb(phba, pring->ringno,
+							abtsiocbq, 0);
+		}
+
+
+		if (ret_val == IOCB_ERROR)
+			__lpfc_sli_release_iocbq(phba, abtsiocbq);
+		else
+			sum++;
+	}
+	spin_unlock_irq(&phba->hbalock);
+	return sum;
+}
+
+/**
  * lpfc_sli_wake_iocb_wait - lpfc_sli_issue_iocb_wait's completion handler
  * @phba: Pointer to HBA context object.
  * @cmdiocbq: Pointer to command iocb.

diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 6f04080..edb4883 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 9b8cda8..7f50aa0 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2009-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index e32cbec..41675c1 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.45"
+#define LPFC_DRIVER_VERSION "10.2.8001.0."
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
@@ -30,4 +30,4 @@
 
 #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
 		LPFC_DRIVER_VERSION
-#define LPFC_COPYRIGHT "Copyright(c) 2004-2013 Emulex.  All rights reserved."
+#define LPFC_COPYRIGHT "Copyright(c) 2004-2014 Emulex.  All rights reserved."

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 1fa0104..de5d0ae 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h

@@ -1648,16 +1648,16 @@
  */
 struct crc_context {
 	uint32_t handle;		/* System handle. */
-	uint32_t ref_tag;
-	uint16_t app_tag;
+	__le32 ref_tag;
+	__le16 app_tag;
 	uint8_t ref_tag_mask[4];	/* Validation/Replacement Mask*/
 	uint8_t app_tag_mask[2];	/* Validation/Replacement Mask*/
-	uint16_t guard_seed;		/* Initial Guard Seed */
-	uint16_t prot_opts;		/* Requested Data Protection Mode */
-	uint16_t blk_size;		/* Data size in bytes */
+	__le16 guard_seed;		/* Initial Guard Seed */
+	__le16 prot_opts;		/* Requested Data Protection Mode */
+	__le16 blk_size;		/* Data size in bytes */
 	uint16_t runt_blk_guard;	/* Guard value for runt block (tape
 					 * only) */
-	uint32_t byte_count;		/* Total byte count/ total data
+	__le32 byte_count;		/* Total byte count/ total data
 					 * transfer count */
 	union {
 		struct {
@@ -1671,10 +1671,10 @@
 			uint32_t	reserved_6;
 		} nobundling;
 		struct {
-			uint32_t	dif_byte_count;	/* Total DIF byte
+			__le32	dif_byte_count;	/* Total DIF byte
 							 * count */
 			uint16_t	reserved_1;
-			uint16_t	dseg_count;	/* Data segment count */
+			__le16	dseg_count;	/* Data segment count */
 			uint32_t	reserved_2;
 			uint32_t	data_address[2];
 			uint32_t	data_length;

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index b1d10f9..4b188b0 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c

@@ -104,7 +104,6 @@
 /*
  * Global Variables
  */
-static struct kmem_cache *qla_tgt_cmd_cachep;
 static struct kmem_cache *qla_tgt_mgmt_cmd_cachep;
 static mempool_t *qla_tgt_mgmt_cmd_mempool;
 static struct workqueue_struct *qla_tgt_wq;
@@ -1997,7 +1996,7 @@
 	 * have been immplemented by TCM, before AppTag is avail.
 	 * Look for modesense_handlers[]
 	 */
-	ctx->app_tag = __constant_cpu_to_le16(0);
+	ctx->app_tag = 0;
 	ctx->app_tag_mask[0] = 0x0;
 	ctx->app_tag_mask[1] = 0x0;
 
@@ -2079,6 +2078,7 @@
 	struct se_cmd		*se_cmd = &cmd->se_cmd;
 	uint32_t h;
 	struct atio_from_isp *atio = &prm->cmd->atio;
+	uint16_t t16;
 
 	sgc = 0;
 	ha = vha->hw;
@@ -2175,8 +2175,13 @@
 	pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
 	pkt->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
 	pkt->exchange_addr   = atio->u.isp24.exchange_addr;
-	pkt->ox_id  = swab16(atio->u.isp24.fcp_hdr.ox_id);
-	pkt->flags |= (atio->u.isp24.attr << 9);
+
+	/* silence compile warning */
+	t16 = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+	pkt->ox_id  = cpu_to_le16(t16);
+
+	t16 = (atio->u.isp24.attr << 9);
+	pkt->flags |= cpu_to_le16(t16);
 	pkt->relative_offset = cpu_to_le32(prm->cmd->offset);
 
 	/* Set transfer direction */
@@ -2251,8 +2256,7 @@
 
 	if (bundling && prm->prot_seg_cnt) {
 		/* Walks dif segments */
-		pkt->add_flags |=
-			__constant_cpu_to_le16(CTIO_CRC2_AF_DIF_DSD_ENA);
+		pkt->add_flags |= CTIO_CRC2_AF_DIF_DSD_ENA;
 
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
 		if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
@@ -2705,6 +2709,8 @@
 
 void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 {
+	struct qla_tgt_sess *sess = cmd->sess;
+
 	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe074,
 	    "%s: se_cmd[%p] ox_id %04x\n",
 	    __func__, &cmd->se_cmd,
@@ -2713,7 +2719,12 @@
 	BUG_ON(cmd->sg_mapped);
 	if (unlikely(cmd->free_sg))
 		kfree(cmd->sg);
-	kmem_cache_free(qla_tgt_cmd_cachep, cmd);
+
+	if (!sess || !sess->se_sess) {
+		WARN_ON(1);
+		return;
+	}
+	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
 }
 EXPORT_SYMBOL(qlt_free_cmd);
 
@@ -3075,13 +3086,12 @@
 /*
  * Process context for I/O path into tcm_qla2xxx code
  */
-static void qlt_do_work(struct work_struct *work)
+static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 {
-	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
 	scsi_qla_host_t *vha = cmd->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess = NULL;
+	struct qla_tgt_sess *sess = cmd->sess;
 	struct atio_from_isp *atio = &cmd->atio;
 	unsigned char *cdb;
 	unsigned long flags;
@@ -3091,41 +3101,6 @@
 	if (tgt->tgt_stop)
 		goto out_term;
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha,
-	    atio->u.isp24.fcp_hdr.s_id);
-	/* Do kref_get() before dropping qla_hw_data->hardware_lock. */
-	if (sess)
-		kref_get(&sess->se_sess->sess_kref);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	if (unlikely(!sess)) {
-		uint8_t *s_id =	atio->u.isp24.fcp_hdr.s_id;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022,
-			"qla_target(%d): Unable to find wwn login"
-			" (s_id %x:%x:%x), trying to create it manually\n",
-			vha->vp_idx, s_id[0], s_id[1], s_id[2]);
-
-		if (atio->u.raw.entry_count > 1) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023,
-				"Dropping multy entry cmd %p\n", cmd);
-			goto out_term;
-		}
-
-		mutex_lock(&vha->vha_tgt.tgt_mutex);
-		sess = qlt_make_local_sess(vha, s_id);
-		/* sess has an extra creation ref. */
-		mutex_unlock(&vha->vha_tgt.tgt_mutex);
-
-		if (!sess)
-			goto out_term;
-	}
-
-	cmd->sess = sess;
-	cmd->loop_id = sess->loop_id;
-	cmd->conf_compl_supported = sess->conf_compl_supported;
-
 	cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
 	cmd->tag = atio->u.isp24.exchange_addr;
 	cmd->unpacked_lun = scsilun_to_int(
@@ -3153,8 +3128,8 @@
 		cmd, &cmd->se_cmd, cmd->unpacked_lun, cmd->tag, data_length,
 		cmd->atio.u.isp24.fcp_hdr.ox_id);
 
-	ret = vha->hw->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length,
-	    fcp_task_attr, data_dir, bidi);
+	ret = ha->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length,
+				          fcp_task_attr, data_dir, bidi);
 	if (ret != 0)
 		goto out_term;
 	/*
@@ -3173,17 +3148,114 @@
 	 */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1);
-	kmem_cache_free(qla_tgt_cmd_cachep, cmd);
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
+	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
+static void qlt_do_work(struct work_struct *work)
+{
+	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+
+	__qlt_do_work(cmd);
+}
+
+static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
+				       struct qla_tgt_sess *sess,
+				       struct atio_from_isp *atio)
+{
+	struct se_session *se_sess = sess->se_sess;
+	struct qla_tgt_cmd *cmd;
+	int tag;
+
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	if (tag < 0)
+		return NULL;
+
+	cmd = &((struct qla_tgt_cmd *)se_sess->sess_cmd_map)[tag];
+	memset(cmd, 0, sizeof(struct qla_tgt_cmd));
+
+	memcpy(&cmd->atio, atio, sizeof(*atio));
+	cmd->state = QLA_TGT_STATE_NEW;
+	cmd->tgt = vha->vha_tgt.qla_tgt;
+	cmd->vha = vha;
+	cmd->se_cmd.map_tag = tag;
+	cmd->sess = sess;
+	cmd->loop_id = sess->loop_id;
+	cmd->conf_compl_supported = sess->conf_compl_supported;
+
+	return cmd;
+}
+
+static void qlt_send_busy(struct scsi_qla_host *, struct atio_from_isp *,
+			  uint16_t);
+
+static void qlt_create_sess_from_atio(struct work_struct *work)
+{
+	struct qla_tgt_sess_op *op = container_of(work,
+					struct qla_tgt_sess_op, work);
+	scsi_qla_host_t *vha = op->vha;
+	struct qla_hw_data *ha = vha->hw;
+	struct qla_tgt_sess *sess;
+	struct qla_tgt_cmd *cmd;
+	unsigned long flags;
+	uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id;
+
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022,
+		"qla_target(%d): Unable to find wwn login"
+		" (s_id %x:%x:%x), trying to create it manually\n",
+		vha->vp_idx, s_id[0], s_id[1], s_id[2]);
+
+	if (op->atio.u.raw.entry_count > 1) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023,
+		        "Dropping multy entry atio %p\n", &op->atio);
+		goto out_term;
+	}
+
+	mutex_lock(&vha->vha_tgt.tgt_mutex);
+	sess = qlt_make_local_sess(vha, s_id);
+	/* sess has an extra creation ref. */
+	mutex_unlock(&vha->vha_tgt.tgt_mutex);
+
+	if (!sess)
+		goto out_term;
+	/*
+	 * Now obtain a pre-allocated session tag using the original op->atio
+	 * packet header, and dispatch into __qlt_do_work() using the existing
+	 * process context.
+	 */
+	cmd = qlt_get_tag(vha, sess, &op->atio);
+	if (!cmd) {
+		spin_lock_irqsave(&ha->hardware_lock, flags);
+		qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY);
+		ha->tgt.tgt_ops->put_sess(sess);
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+		kfree(op);
+		return;
+	}
+	/*
+	 * __qlt_do_work() will call ha->tgt.tgt_ops->put_sess() to release
+	 * the extra reference taken above by qlt_make_local_sess()
+	 */
+	__qlt_do_work(cmd);
+	kfree(op);
+	return;
+
+out_term:
+	spin_lock_irqsave(&ha->hardware_lock, flags);
+	qlt_send_term_exchange(vha, NULL, &op->atio, 1);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	kfree(op);
+
+}
+
 /* ha->hardware_lock supposed to be held on entry */
 static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	struct atio_from_isp *atio)
 {
+	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	struct qla_tgt_sess *sess;
 	struct qla_tgt_cmd *cmd;
 
 	if (unlikely(tgt->tgt_stop)) {
@@ -3192,18 +3264,31 @@
 		return -EFAULT;
 	}
 
-	cmd = kmem_cache_zalloc(qla_tgt_cmd_cachep, GFP_ATOMIC);
+	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, atio->u.isp24.fcp_hdr.s_id);
+	if (unlikely(!sess)) {
+		struct qla_tgt_sess_op *op = kzalloc(sizeof(struct qla_tgt_sess_op),
+						     GFP_ATOMIC);
+		if (!op)
+			return -ENOMEM;
+
+		memcpy(&op->atio, atio, sizeof(*atio));
+		INIT_WORK(&op->work, qlt_create_sess_from_atio);
+		queue_work(qla_tgt_wq, &op->work);
+		return 0;
+	}
+	/*
+	 * Do kref_get() before returning + dropping qla_hw_data->hardware_lock.
+	 */
+	kref_get(&sess->se_sess->sess_kref);
+
+	cmd = qlt_get_tag(vha, sess, atio);
 	if (!cmd) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05e,
 		    "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx);
+		ha->tgt.tgt_ops->put_sess(sess);
 		return -ENOMEM;
 	}
 
-	memcpy(&cmd->atio, atio, sizeof(*atio));
-	cmd->state = QLA_TGT_STATE_NEW;
-	cmd->tgt = vha->vha_tgt.qla_tgt;
-	cmd->vha = vha;
-
 	INIT_WORK(&cmd->work, qlt_do_work);
 	queue_work(qla_tgt_wq, &cmd->work);
 	return 0;
@@ -5501,23 +5586,13 @@
 	if (!QLA_TGT_MODE_ENABLED())
 		return 0;
 
-	qla_tgt_cmd_cachep = kmem_cache_create("qla_tgt_cmd_cachep",
-	    sizeof(struct qla_tgt_cmd), __alignof__(struct qla_tgt_cmd), 0,
-	    NULL);
-	if (!qla_tgt_cmd_cachep) {
-		ql_log(ql_log_fatal, NULL, 0xe06c,
-		    "kmem_cache_create for qla_tgt_cmd_cachep failed\n");
-		return -ENOMEM;
-	}
-
 	qla_tgt_mgmt_cmd_cachep = kmem_cache_create("qla_tgt_mgmt_cmd_cachep",
 	    sizeof(struct qla_tgt_mgmt_cmd), __alignof__(struct
 	    qla_tgt_mgmt_cmd), 0, NULL);
 	if (!qla_tgt_mgmt_cmd_cachep) {
 		ql_log(ql_log_fatal, NULL, 0xe06d,
 		    "kmem_cache_create for qla_tgt_mgmt_cmd_cachep failed\n");
-		ret = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	qla_tgt_mgmt_cmd_mempool = mempool_create(25, mempool_alloc_slab,
@@ -5545,8 +5620,6 @@
 	mempool_destroy(qla_tgt_mgmt_cmd_mempool);
 out_mgmt_cmd_cachep:
 	kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep);
-out:
-	kmem_cache_destroy(qla_tgt_cmd_cachep);
 	return ret;
 }
 
@@ -5558,5 +5631,4 @@
 	destroy_workqueue(qla_tgt_wq);
 	mempool_destroy(qla_tgt_mgmt_cmd_mempool);
 	kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep);
-	kmem_cache_destroy(qla_tgt_cmd_cachep);
 }

diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index f873e10..e0a58fd 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h

@@ -316,7 +316,7 @@
 	uint8_t  seq_id;
 	uint8_t  df_ctl;
 	uint16_t seq_cnt;
-	uint16_t ox_id;
+	__be16   ox_id;
 	uint16_t rx_id;
 	uint32_t parameter;
 } __packed;
@@ -441,7 +441,7 @@
 	union {
 		struct {
 			uint16_t reserved1;
-			uint16_t flags;
+			__le16 flags;
 			uint32_t residual;
 			uint16_t ox_id;
 			uint16_t scsi_status;
@@ -527,7 +527,7 @@
 
 	uint32_t handle;		/* System handle. */
 	uint16_t nport_handle;		/* N_PORT handle. */
-	uint16_t timeout;		/* Command timeout. */
+	__le16 timeout;		/* Command timeout. */
 
 	uint16_t dseg_count;		/* Data segment count. */
 	uint8_t  vp_index;
@@ -538,15 +538,15 @@
 	uint8_t  reserved1;
 	uint32_t exchange_addr;		/* rcv exchange address */
 	uint16_t reserved2;
-	uint16_t flags;			/* refer to CTIO7 flags values */
+	__le16 flags;			/* refer to CTIO7 flags values */
 	uint32_t residual;
-	uint16_t ox_id;
+	__le16 ox_id;
 	uint16_t scsi_status;
-	uint32_t relative_offset;
+	__le32 relative_offset;
 	uint32_t reserved5;
-	uint32_t transfer_length;		/* total fc transfer length */
+	__le32 transfer_length;		/* total fc transfer length */
 	uint32_t reserved6;
-	uint32_t crc_context_address[2];/* Data segment address. */
+	__le32 crc_context_address[2];/* Data segment address. */
 	uint16_t crc_context_len;	/* Data segment length. */
 	uint16_t reserved_1;		/* MUST be set to 0. */
 } __packed;
@@ -870,6 +870,12 @@
 	struct list_head tgt_list_entry;
 };
 
+struct qla_tgt_sess_op {
+	struct scsi_qla_host *vha;
+	struct atio_from_isp atio;
+	struct work_struct work;
+};
+
 /*
  * Equivilant to IT Nexus (Initiator-Target)
  */

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 896cb23..e2beab9 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c

@@ -1501,6 +1501,8 @@
 	struct qla_tgt_sess *sess = qla_tgt_sess;
 	unsigned char port_name[36];
 	unsigned long flags;
+	int num_tags = (ha->fw_xcb_count) ? ha->fw_xcb_count :
+		       TCM_QLA2XXX_DEFAULT_TAGS;
 
 	lport = vha->vha_tgt.target_lport_ptr;
 	if (!lport) {
@@ -1518,7 +1520,9 @@
 	}
 	se_tpg = &tpg->se_tpg;
 
-	se_sess = transport_init_session(TARGET_PROT_NORMAL);
+	se_sess = transport_init_session_tags(num_tags,
+					      sizeof(struct qla_tgt_cmd),
+					      TARGET_PROT_NORMAL);
 	if (IS_ERR(se_sess)) {
 		pr_err("Unable to initialize struct se_session\n");
 		return PTR_ERR(se_sess);

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index 33aaac8..10c0021 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h

@@ -4,6 +4,11 @@
 #define TCM_QLA2XXX_VERSION	"v0.1"
 /* length of ASCII WWPNs including pad */
 #define TCM_QLA2XXX_NAMELEN	32
+/*
+ * Number of pre-allocated per-session tags, based upon the worst-case
+ * per port number of iocbs
+ */
+#define TCM_QLA2XXX_DEFAULT_TAGS 2088
 
 #include "qla_target.h"
 

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 99fdb94..89ee592 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c

@@ -23,6 +23,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_scsi.h>
 #include <linux/cpu.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_cmnd.h>
@@ -37,6 +38,7 @@
 	struct completion *comp;
 	union {
 		struct virtio_scsi_cmd_req       cmd;
+		struct virtio_scsi_cmd_req_pi    cmd_pi;
 		struct virtio_scsi_ctrl_tmf_req  tmf;
 		struct virtio_scsi_ctrl_an_req   an;
 	} req;
@@ -399,7 +401,7 @@
 			    size_t req_size, size_t resp_size)
 {
 	struct scsi_cmnd *sc = cmd->sc;
-	struct scatterlist *sgs[4], req, resp;
+	struct scatterlist *sgs[6], req, resp;
 	struct sg_table *out, *in;
 	unsigned out_num = 0, in_num = 0;
 
@@ -417,16 +419,24 @@
 	sgs[out_num++] = &req;
 
 	/* Data-out buffer.  */
-	if (out)
+	if (out) {
+		/* Place WRITE protection SGLs before Data OUT payload */
+		if (scsi_prot_sg_count(sc))
+			sgs[out_num++] = scsi_prot_sglist(sc);
 		sgs[out_num++] = out->sgl;
+	}
 
 	/* Response header.  */
 	sg_init_one(&resp, &cmd->resp, resp_size);
 	sgs[out_num + in_num++] = &resp;
 
 	/* Data-in buffer */
-	if (in)
+	if (in) {
+		/* Place READ protection SGLs before Data IN payload */
+		if (scsi_prot_sg_count(sc))
+			sgs[out_num + in_num++] = scsi_prot_sglist(sc);
 		sgs[out_num + in_num++] = in->sgl;
+	}
 
 	return virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_ATOMIC);
 }
@@ -451,12 +461,45 @@
 	return err;
 }
 
+static void virtio_scsi_init_hdr(struct virtio_scsi_cmd_req *cmd,
+				 struct scsi_cmnd *sc)
+{
+	cmd->lun[0] = 1;
+	cmd->lun[1] = sc->device->id;
+	cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
+	cmd->lun[3] = sc->device->lun & 0xff;
+	cmd->tag = (unsigned long)sc;
+	cmd->task_attr = VIRTIO_SCSI_S_SIMPLE;
+	cmd->prio = 0;
+	cmd->crn = 0;
+}
+
+static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi,
+				    struct scsi_cmnd *sc)
+{
+	struct request *rq = sc->request;
+	struct blk_integrity *bi;
+
+	virtio_scsi_init_hdr((struct virtio_scsi_cmd_req *)cmd_pi, sc);
+
+	if (!rq || !scsi_prot_sg_count(sc))
+		return;
+
+	bi = blk_get_integrity(rq->rq_disk);
+
+	if (sc->sc_data_direction == DMA_TO_DEVICE)
+		cmd_pi->pi_bytesout = blk_rq_sectors(rq) * bi->tuple_size;
+	else if (sc->sc_data_direction == DMA_FROM_DEVICE)
+		cmd_pi->pi_bytesin = blk_rq_sectors(rq) * bi->tuple_size;
+}
+
 static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 				 struct virtio_scsi_vq *req_vq,
 				 struct scsi_cmnd *sc)
 {
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
 	struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc);
+	int req_size;
 
 	BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
 
@@ -468,22 +511,20 @@
 
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->sc = sc;
-	cmd->req.cmd = (struct virtio_scsi_cmd_req){
-		.lun[0] = 1,
-		.lun[1] = sc->device->id,
-		.lun[2] = (sc->device->lun >> 8) | 0x40,
-		.lun[3] = sc->device->lun & 0xff,
-		.tag = (unsigned long)sc,
-		.task_attr = VIRTIO_SCSI_S_SIMPLE,
-		.prio = 0,
-		.crn = 0,
-	};
 
 	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
-	memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
 
-	if (virtscsi_kick_cmd(req_vq, cmd,
-			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd) != 0)
+	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_T10_PI)) {
+		virtio_scsi_init_hdr_pi(&cmd->req.cmd_pi, sc);
+		memcpy(cmd->req.cmd_pi.cdb, sc->cmnd, sc->cmd_len);
+		req_size = sizeof(cmd->req.cmd_pi);
+	} else {
+		virtio_scsi_init_hdr(&cmd->req.cmd, sc);
+		memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
+		req_size = sizeof(cmd->req.cmd);
+	}
+
+	if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0)
 		return SCSI_MLQUEUE_HOST_BUSY;
 	return 0;
 }
@@ -820,7 +861,7 @@
 {
 	struct Scsi_Host *shost;
 	struct virtio_scsi *vscsi;
-	int err;
+	int err, host_prot;
 	u32 sg_elems, num_targets;
 	u32 cmd_per_lun;
 	u32 num_queues;
@@ -870,6 +911,16 @@
 	shost->max_id = num_targets;
 	shost->max_channel = 0;
 	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
+
+	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_T10_PI)) {
+		host_prot = SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION |
+			    SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION |
+			    SHOST_DIX_TYPE2_PROTECTION | SHOST_DIX_TYPE3_PROTECTION;
+
+		scsi_host_set_prot(shost, host_prot);
+		scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+	}
+
 	err = scsi_add_host(shost, &vdev->dev);
 	if (err)
 		goto scsi_add_host_failed;
@@ -939,6 +990,7 @@
 static unsigned int features[] = {
 	VIRTIO_SCSI_F_HOTPLUG,
 	VIRTIO_SCSI_F_CHANGE,
+	VIRTIO_SCSI_F_T10_PI,
 };
 
 static struct virtio_driver virtio_scsi_driver = {

diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
index ded31ea..cbf455d 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c

@@ -396,7 +396,7 @@
 	}
 }
 
-static struct vb2_ops iss_video_vb2ops = {
+static const struct vb2_ops iss_video_vb2ops = {
 	.queue_setup	= iss_video_queue_setup,
 	.buf_prepare	= iss_video_buf_prepare,
 	.buf_queue	= iss_video_buf_queue,

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 9189bc0..5663f4d 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c

@@ -300,7 +300,7 @@
 		port = ntohs(sock_in->sin_port);
 	}
 
-	if ((ip_match == true) && (np->np_port == port) &&
+	if (ip_match && (np->np_port == port) &&
 	    (np->np_network_transport == network_transport))
 		return true;
 
@@ -325,7 +325,7 @@
 		}
 
 		match = iscsit_check_np_match(sockaddr, np, network_transport);
-		if (match == true) {
+		if (match) {
 			/*
 			 * Increment the np_exports reference count now to
 			 * prevent iscsit_del_np() below from being called
@@ -1121,7 +1121,7 @@
 	/*
 	 * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes.
 	 */
-	if (dump_payload == true)
+	if (dump_payload)
 		goto after_immediate_data;
 
 	immed_ret = iscsit_handle_immediate_data(cmd, hdr,
@@ -3390,7 +3390,9 @@
 
 #define SENDTARGETS_BUF_LIMIT 32768U
 
-static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+static int
+iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
+				  enum iscsit_transport_type network_transport)
 {
 	char *payload = NULL;
 	struct iscsi_conn *conn = cmd->conn;
@@ -3467,6 +3469,9 @@
 				struct iscsi_np *np = tpg_np->tpg_np;
 				bool inaddr_any = iscsit_check_inaddr_any(np);
 
+				if (np->np_network_transport != network_transport)
+					continue;
+
 				if (!target_name_printed) {
 					len = sprintf(buf, "TargetName=%s",
 						      tiqn->tiqn);
@@ -3485,10 +3490,8 @@
 
 				len = sprintf(buf, "TargetAddress="
 					"%s:%hu,%hu",
-					(inaddr_any == false) ?
-						np->np_ip : conn->local_ip,
-					(inaddr_any == false) ?
-						np->np_port : conn->local_port,
+					inaddr_any ? conn->local_ip : np->np_ip,
+					inaddr_any ? conn->local_port : np->np_port,
 					tpg->tpgt);
 				len += 1;
 
@@ -3520,11 +3523,12 @@
 
 int
 iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
-		      struct iscsi_text_rsp *hdr)
+		      struct iscsi_text_rsp *hdr,
+		      enum iscsit_transport_type network_transport)
 {
 	int text_length, padding;
 
-	text_length = iscsit_build_sendtargets_response(cmd);
+	text_length = iscsit_build_sendtargets_response(cmd, network_transport);
 	if (text_length < 0)
 		return text_length;
 
@@ -3562,7 +3566,7 @@
 	u32 tx_size = 0;
 	int text_length, iov_count = 0, rc;
 
-	rc = iscsit_build_text_rsp(cmd, conn, hdr);
+	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_TCP);
 	if (rc < 0)
 		return rc;
 
@@ -4234,8 +4238,6 @@
 	if (conn->conn_transport->iscsit_wait_conn)
 		conn->conn_transport->iscsit_wait_conn(conn);
 
-	iscsit_free_queue_reqs_for_conn(conn);
-
 	/*
 	 * During Connection recovery drop unacknowledged out of order
 	 * commands for this connection, and prepare the other commands
@@ -4252,6 +4254,7 @@
 		iscsit_clear_ooo_cmdsns_for_conn(conn);
 		iscsit_release_commands_from_conn(conn);
 	}
+	iscsit_free_queue_reqs_for_conn(conn);
 
 	/*
 	 * Handle decrementing session or connection usage count if

diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index de77d9a..19b842c 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c

@@ -71,6 +71,40 @@
 			challenge_asciihex);
 }
 
+static int chap_check_algorithm(const char *a_str)
+{
+	char *tmp, *orig, *token;
+
+	tmp = kstrdup(a_str, GFP_KERNEL);
+	if (!tmp) {
+		pr_err("Memory allocation failed for CHAP_A temporary buffer\n");
+		return CHAP_DIGEST_UNKNOWN;
+	}
+	orig = tmp;
+
+	token = strsep(&tmp, "=");
+	if (!token)
+		goto out;
+
+	if (strcmp(token, "CHAP_A")) {
+		pr_err("Unable to locate CHAP_A key\n");
+		goto out;
+	}
+	while (token) {
+		token = strsep(&tmp, ",");
+		if (!token)
+			goto out;
+
+		if (!strncmp(token, "5", 1)) {
+			pr_debug("Selected MD5 Algorithm\n");
+			kfree(orig);
+			return CHAP_DIGEST_MD5;
+		}
+	}
+out:
+	kfree(orig);
+	return CHAP_DIGEST_UNKNOWN;
+}
 
 static struct iscsi_chap *chap_server_open(
 	struct iscsi_conn *conn,
@@ -79,6 +113,7 @@
 	char *aic_str,
 	unsigned int *aic_len)
 {
+	int ret;
 	struct iscsi_chap *chap;
 
 	if (!(auth->naf_flags & NAF_USERID_SET) ||
@@ -93,21 +128,24 @@
 		return NULL;
 
 	chap = conn->auth_protocol;
-	/*
-	 * We only support MD5 MDA presently.
-	 */
-	if (strncmp(a_str, "CHAP_A=5", 8)) {
-		pr_err("CHAP_A is not MD5.\n");
+	ret = chap_check_algorithm(a_str);
+	switch (ret) {
+	case CHAP_DIGEST_MD5:
+		pr_debug("[server] Got CHAP_A=5\n");
+		/*
+		 * Send back CHAP_A set to MD5.
+		*/
+		*aic_len = sprintf(aic_str, "CHAP_A=5");
+		*aic_len += 1;
+		chap->digest_type = CHAP_DIGEST_MD5;
+		pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+		break;
+	case CHAP_DIGEST_UNKNOWN:
+	default:
+		pr_err("Unsupported CHAP_A value\n");
 		return NULL;
 	}
-	pr_debug("[server] Got CHAP_A=5\n");
-	/*
-	 * Send back CHAP_A set to MD5.
-	 */
-	*aic_len = sprintf(aic_str, "CHAP_A=5");
-	*aic_len += 1;
-	chap->digest_type = CHAP_DIGEST_MD5;
-	pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+
 	/*
 	 * Set Identifier.
 	 */
@@ -314,6 +352,16 @@
 		goto out;
 	}
 	/*
+	 * During mutual authentication, the CHAP_C generated by the
+	 * initiator must not match the original CHAP_C generated by
+	 * the target.
+	 */
+	if (!memcmp(challenge_binhex, chap->challenge, CHAP_CHALLENGE_LENGTH)) {
+		pr_err("initiator CHAP_C matches target CHAP_C, failing"
+		       " login attempt\n");
+		goto out;
+	}
+	/*
 	 * Generate CHAP_N and CHAP_R for mutual authentication.
 	 */
 	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);

diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h
index 2f463c0..d22f7b96 100644
--- a/drivers/target/iscsi/iscsi_target_auth.h
+++ b/drivers/target/iscsi/iscsi_target_auth.h

@@ -1,6 +1,7 @@
 #ifndef _ISCSI_CHAP_H_
 #define _ISCSI_CHAP_H_
 
+#define CHAP_DIGEST_UNKNOWN	0
 #define CHAP_DIGEST_MD5		5
 #define CHAP_DIGEST_SHA		6
 

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index d9b1d88..fecb695 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c

@@ -1145,7 +1145,7 @@
 void iscsi_target_login_sess_out(struct iscsi_conn *conn,
 		struct iscsi_np *np, bool zero_tsih, bool new_sess)
 {
-	if (new_sess == false)
+	if (!new_sess)
 		goto old_sess_out;
 
 	pr_err("iSCSI Login negotiation failed.\n");

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 75b6859..62a095f 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c

@@ -404,7 +404,7 @@
 	}
 
 	rc = schedule_delayed_work(&conn->login_work, 0);
-	if (rc == false) {
+	if (!rc) {
 		pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work"
 			 " got false\n");
 	}
@@ -513,7 +513,7 @@
 	state = (tpg->tpg_state == TPG_STATE_ACTIVE);
 	spin_unlock(&tpg->tpg_state_lock);
 
-	if (state == false) {
+	if (!state) {
 		pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n");
 		iscsi_target_restore_sock_callbacks(conn);
 		iscsi_target_login_drop(conn, login);
@@ -528,7 +528,7 @@
 		state = iscsi_target_sk_state_check(sk);
 		read_unlock_bh(&sk->sk_callback_lock);
 
-		if (state == false) {
+		if (!state) {
 			pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
 			iscsi_target_restore_sock_callbacks(conn);
 			iscsi_target_login_drop(conn, login);
@@ -773,6 +773,12 @@
 		}
 
 		goto do_auth;
+	} else if (!payload_length) {
+		pr_err("Initiator sent zero length security payload,"
+		       " login failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				    ISCSI_LOGIN_STATUS_AUTH_FAILED);
+		return -1;
 	}
 
 	if (login->first_request)

diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 4d2e23f..02f9de2 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c

@@ -474,10 +474,10 @@
 		if (!strcmp(param->name, AUTHMETHOD)) {
 			SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, HEADERDIGEST)) {
-			if (iser == false)
+			if (!iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, DATADIGEST)) {
-			if (iser == false)
+			if (!iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, MAXCONNECTIONS)) {
 			SET_PSTATE_NEGOTIATE(param);
@@ -497,7 +497,7 @@
 		} else if (!strcmp(param->name, IMMEDIATEDATA)) {
 			SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
-			if (iser == false)
+			if (!iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, MAXXMITDATASEGMENTLENGTH)) {
 			continue;
@@ -528,13 +528,13 @@
 		} else if (!strcmp(param->name, OFMARKINT)) {
 			SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, RDMAEXTENSIONS)) {
-			if (iser == true)
+			if (iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, INITIATORRECVDATASEGMENTLENGTH)) {
-			if (iser == true)
+			if (iser)
 				SET_PSTATE_NEGOTIATE(param);
 		} else if (!strcmp(param->name, TARGETRECVDATASEGMENTLENGTH)) {
-			if (iser == true)
+			if (iser)
 				SET_PSTATE_NEGOTIATE(param);
 		}
 	}
@@ -1605,7 +1605,7 @@
 
 	tmpbuf = kzalloc(length + 1, GFP_KERNEL);
 	if (!tmpbuf) {
-		pr_err("Unable to allocate memory for tmpbuf.\n");
+		pr_err("Unable to allocate %u + 1 bytes for tmpbuf.\n", length);
 		return -1;
 	}
 

diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 1431e84..c3cb5c1 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c

@@ -189,7 +189,7 @@
 	iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown);
 }
 
-void iscsit_clear_tpg_np_login_threads(
+static void iscsit_clear_tpg_np_login_threads(
 	struct iscsi_portal_group *tpg,
 	bool shutdown)
 {
@@ -276,8 +276,6 @@
 	tpg->tpg_state = TPG_STATE_INACTIVE;
 	spin_unlock(&tpg->tpg_state_lock);
 
-	iscsit_clear_tpg_np_login_threads(tpg, true);
-
 	if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
 		pr_err("Unable to delete iSCSI Target Portal Group:"
 			" %hu while active sessions exist, and force=0\n",
@@ -453,7 +451,7 @@
 
 			match = iscsit_check_np_match(sockaddr, np,
 						network_transport);
-			if (match == true)
+			if (match)
 				break;
 		}
 		spin_unlock(&tpg->tpg_np_lock);
@@ -475,7 +473,7 @@
 
 	if (!tpg_np_parent) {
 		if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr,
-				network_transport) == true) {
+				network_transport)) {
 			pr_err("Network Portal: %s already exists on a"
 				" different TPG on %s\n", ip_str,
 				tpg->tpg_tiqn->tiqn);

diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
index 0a182f2..e726533 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.h
+++ b/drivers/target/iscsi/iscsi_target_tpg.h

@@ -8,7 +8,6 @@
 			struct iscsi_np *, struct iscsi_tpg_np **);
 extern int iscsit_get_tpg(struct iscsi_portal_group *);
 extern void iscsit_put_tpg(struct iscsi_portal_group *);
-extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool);
 extern void iscsit_tpg_dump_params(struct iscsi_portal_group *);
 extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *);
 extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *,

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 73ab75d..6d2f375 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c

@@ -179,7 +179,7 @@
 	struct tcm_loop_hba *tl_hba;
 	struct tcm_loop_tpg *tl_tpg;
 	struct scatterlist *sgl_bidi = NULL;
-	u32 sgl_bidi_count = 0;
+	u32 sgl_bidi_count = 0, transfer_length;
 	int rc;
 
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
@@ -213,12 +213,21 @@
 
 	}
 
-	if (!scsi_prot_sg_count(sc) && scsi_get_prot_op(sc) != SCSI_PROT_NORMAL)
+	transfer_length = scsi_transfer_length(sc);
+	if (!scsi_prot_sg_count(sc) &&
+	    scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) {
 		se_cmd->prot_pto = true;
+		/*
+		 * loopback transport doesn't support
+		 * WRITE_GENERATE, READ_STRIP protection
+		 * information operations, go ahead unprotected.
+		 */
+		transfer_length = scsi_bufflen(sc);
+	}
 
 	rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd,
 			&tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun,
-			scsi_bufflen(sc), tcm_loop_sam_attr(sc),
+			transfer_length, tcm_loop_sam_attr(sc),
 			sc->sc_data_direction, 0,
 			scsi_sglist(sc), scsi_sg_count(sc),
 			sgl_bidi, sgl_bidi_count,

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index e022959..bd78d92 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c

@@ -81,7 +81,7 @@
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 8);
 	return 0;
 }
 
@@ -137,7 +137,7 @@
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 32);
 	return 0;
 }
 
@@ -176,24 +176,6 @@
 	return cmd->se_dev->dev_attrib.block_size * sectors;
 }
 
-static int sbc_check_valid_sectors(struct se_cmd *cmd)
-{
-	struct se_device *dev = cmd->se_dev;
-	unsigned long long end_lba;
-	u32 sectors;
-
-	sectors = cmd->data_length / dev->dev_attrib.block_size;
-	end_lba = dev->transport->get_blocks(dev) + 1;
-
-	if (cmd->t_task_lba + sectors > end_lba) {
-		pr_err("target: lba %llu, sectors %u exceeds end lba %llu\n",
-			cmd->t_task_lba, sectors, end_lba);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static inline u32 transport_get_sectors_6(unsigned char *cdb)
 {
 	/*
@@ -665,8 +647,19 @@
 
 	cmd->prot_type = dev->dev_attrib.pi_prot_type;
 	cmd->prot_length = dev->prot_length * sectors;
-	pr_debug("%s: prot_type=%d, prot_length=%d prot_op=%d prot_checks=%d\n",
-		 __func__, cmd->prot_type, cmd->prot_length,
+
+	/**
+	 * In case protection information exists over the wire
+	 * we modify command data length to describe pure data.
+	 * The actual transfer length is data length + protection
+	 * length
+	 **/
+	if (protect)
+		cmd->data_length = sectors * dev->dev_attrib.block_size;
+
+	pr_debug("%s: prot_type=%d, data_length=%d, prot_length=%d "
+		 "prot_op=%d prot_checks=%d\n",
+		 __func__, cmd->prot_type, cmd->data_length, cmd->prot_length,
 		 cmd->prot_op, cmd->prot_checks);
 
 	return true;
@@ -877,15 +870,6 @@
 		break;
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
-		if (!ops->execute_sync_cache) {
-			size = 0;
-			cmd->execute_cmd = sbc_emulate_noop;
-			break;
-		}
-
-		/*
-		 * Extract LBA and range to be flushed for emulated SYNCHRONIZE_CACHE
-		 */
 		if (cdb[0] == SYNCHRONIZE_CACHE) {
 			sectors = transport_get_sectors_10(cdb);
 			cmd->t_task_lba = transport_lba_32(cdb);
@@ -893,18 +877,12 @@
 			sectors = transport_get_sectors_16(cdb);
 			cmd->t_task_lba = transport_lba_64(cdb);
 		}
-
-		size = sbc_get_size(cmd, sectors);
-
-		/*
-		 * Check to ensure that LBA + Range does not exceed past end of
-		 * device for IBLOCK and FILEIO ->do_sync_cache() backend calls
-		 */
-		if (cmd->t_task_lba || sectors) {
-			if (sbc_check_valid_sectors(cmd) < 0)
-				return TCM_ADDRESS_OUT_OF_RANGE;
+		if (ops->execute_sync_cache) {
+			cmd->execute_cmd = ops->execute_sync_cache;
+			goto check_lba;
 		}
-		cmd->execute_cmd = ops->execute_sync_cache;
+		size = 0;
+		cmd->execute_cmd = sbc_emulate_noop;
 		break;
 	case UNMAP:
 		if (!ops->execute_unmap)
@@ -947,8 +925,10 @@
 		break;
 	case VERIFY:
 		size = 0;
+		sectors = transport_get_sectors_10(cdb);
+		cmd->t_task_lba = transport_lba_32(cdb);
 		cmd->execute_cmd = sbc_emulate_noop;
-		break;
+		goto check_lba;
 	case REZERO_UNIT:
 	case SEEK_6:
 	case SEEK_10:
@@ -988,7 +968,7 @@
 				dev->dev_attrib.hw_max_sectors);
 			return TCM_INVALID_CDB_FIELD;
 		}
-
+check_lba:
 		end_lba = dev->transport->get_blocks(dev) + 1;
 		if (cmd->t_task_lba + sectors > end_lba) {
 			pr_err("cmd exceeds last lba %llu "

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 8653666..6cd7222 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c

@@ -129,15 +129,10 @@
 spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf)
 {
 	struct se_device *dev = cmd->se_dev;
-	u16 len = 0;
+	u16 len;
 
 	if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL) {
-		u32 unit_serial_len;
-
-		unit_serial_len = strlen(dev->t10_wwn.unit_serial);
-		unit_serial_len++; /* For NULL Terminator */
-
-		len += sprintf(&buf[4], "%s", dev->t10_wwn.unit_serial);
+		len = sprintf(&buf[4], "%s", dev->t10_wwn.unit_serial);
 		len++; /* Extra Byte for NULL Terminator */
 		buf[3] = len;
 	}
@@ -721,6 +716,7 @@
 	unsigned char *buf;
 	sense_reason_t ret;
 	int p;
+	int len = 0;
 
 	buf = kzalloc(SE_INQUIRY_BUF, GFP_KERNEL);
 	if (!buf) {
@@ -742,6 +738,7 @@
 		}
 
 		ret = spc_emulate_inquiry_std(cmd, buf);
+		len = buf[4] + 5;
 		goto out;
 	}
 
@@ -749,6 +746,7 @@
 		if (cdb[2] == evpd_handlers[p].page) {
 			buf[1] = cdb[2];
 			ret = evpd_handlers[p].emulate(cmd, buf);
+			len = get_unaligned_be16(&buf[2]) + 4;
 			goto out;
 		}
 	}
@@ -765,7 +763,7 @@
 	kfree(buf);
 
 	if (!ret)
-		target_complete_cmd(cmd, GOOD);
+		target_complete_cmd_with_length(cmd, GOOD, len);
 	return ret;
 }
 
@@ -1103,7 +1101,7 @@
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, length);
 	return 0;
 }
 
@@ -1279,7 +1277,7 @@
 	buf[3] = (lun_count & 0xff);
 	transport_kunmap_data_sg(cmd);
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 8 + lun_count * 8);
 	return 0;
 }
 EXPORT_SYMBOL(spc_emulate_report_luns);

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 2179fee..7fa62fc 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c

@@ -504,7 +504,7 @@
 	 * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group
 	 * removal context.
 	 */
-	if (se_nacl && comp_nacl == true)
+	if (se_nacl && comp_nacl)
 		target_put_nacl(se_nacl);
 
 	transport_free_session(se_sess);
@@ -562,7 +562,7 @@
 
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-		complete(&cmd->t_transport_stop_comp);
+		complete_all(&cmd->t_transport_stop_comp);
 		return 1;
 	}
 
@@ -687,7 +687,7 @@
 	if (cmd->transport_state & CMD_T_ABORTED &&
 	    cmd->transport_state & CMD_T_STOP) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		complete(&cmd->t_transport_stop_comp);
+		complete_all(&cmd->t_transport_stop_comp);
 		return;
 	} else if (!success) {
 		INIT_WORK(&cmd->work, target_complete_failure_work);
@@ -703,6 +703,23 @@
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
+void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+{
+	if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) {
+		if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+			cmd->residual_count += cmd->data_length - length;
+		} else {
+			cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT;
+			cmd->residual_count = cmd->data_length - length;
+		}
+
+		cmd->data_length = length;
+	}
+
+	target_complete_cmd(cmd, scsi_status);
+}
+EXPORT_SYMBOL(target_complete_cmd_with_length);
+
 static void target_add_to_state_list(struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
@@ -1761,7 +1778,7 @@
 			cmd->se_tfo->get_task_tag(cmd));
 
 		spin_unlock_irq(&cmd->t_state_lock);
-		complete(&cmd->t_transport_stop_comp);
+		complete_all(&cmd->t_transport_stop_comp);
 		return;
 	}
 
@@ -2363,7 +2380,7 @@
 	 * fabric acknowledgement that requires two target_put_sess_cmd()
 	 * invocations before se_cmd descriptor release.
 	 */
-	if (ack_kref == true) {
+	if (ack_kref) {
 		kref_get(&se_cmd->cmd_kref);
 		se_cmd->se_cmd_flags |= SCF_ACK_KREF;
 	}
@@ -2407,6 +2424,10 @@
  */
 int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd)
 {
+	if (!se_sess) {
+		se_cmd->se_tfo->release_cmd(se_cmd);
+		return 1;
+	}
 	return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref,
 			&se_sess->sess_cmd_lock);
 }
@@ -2934,6 +2955,12 @@
 int transport_generic_handle_tmr(
 	struct se_cmd *cmd)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	cmd->transport_state |= CMD_T_ACTIVE;
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
 	INIT_WORK(&cmd->work, target_tmr_work);
 	queue_work(cmd->se_dev->tmr_wq, &cmd->work);
 	return 0;

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 669c536..e9186cd 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c

@@ -70,7 +70,7 @@
 	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
 	int rc;
 
-	if (src == true)
+	if (src)
 		dev_wwn = &xop->dst_tid_wwn[0];
 	else
 		dev_wwn = &xop->src_tid_wwn[0];
@@ -88,7 +88,7 @@
 		if (rc != 0)
 			continue;
 
-		if (src == true) {
+		if (src) {
 			xop->dst_dev = se_dev;
 			pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located"
 				" se_dev\n", xop->dst_dev);
@@ -166,7 +166,7 @@
 		return -EINVAL;
 	}
 
-	if (src == true) {
+	if (src) {
 		memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
 		/*
 		 * Determine if the source designator matches the local device
@@ -236,7 +236,7 @@
 			/*
 			 * Assume target descriptors are in source -> destination order..
 			 */
-			if (src == true)
+			if (src)
 				src = false;
 			else
 				src = true;
@@ -560,7 +560,7 @@
 	 * reservations.  The pt_cmd->se_lun pointer will be setup from within
 	 * target_xcopy_setup_pt_port()
 	 */
-	if (remote_port == false) {
+	if (!remote_port) {
 		pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
 		return 0;
 	}

diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index f5fd515..be0c0d0 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c

@@ -128,6 +128,7 @@
 	struct fc_lport *lport;
 	struct fc_exch *ep;
 	size_t len;
+	int rc;
 
 	if (cmd->aborted)
 		return 0;
@@ -137,9 +138,10 @@
 	len = sizeof(*fcp) + se_cmd->scsi_sense_length;
 	fp = fc_frame_alloc(lport, len);
 	if (!fp) {
-		/* XXX shouldn't just drop it - requeue and retry? */
-		return 0;
+		se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
+		return -ENOMEM;
 	}
+
 	fcp = fc_frame_payload_get(fp, len);
 	memset(fcp, 0, len);
 	fcp->resp.fr_status = se_cmd->scsi_status;
@@ -170,7 +172,18 @@
 	fc_fill_fc_hdr(fp, FC_RCTL_DD_CMD_STATUS, ep->did, ep->sid, FC_TYPE_FCP,
 		       FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ, 0);
 
-	lport->tt.seq_send(lport, cmd->seq, fp);
+	rc = lport->tt.seq_send(lport, cmd->seq, fp);
+	if (rc) {
+		pr_info_ratelimited("%s: Failed to send response frame %p, "
+				    "xid <0x%x>\n", __func__, fp, ep->xid);
+		/*
+		 * Generate a TASK_SET_FULL status to notify the initiator
+		 * to reduce it's queue_depth after the se_cmd response has
+		 * been re-queued by target-core.
+		 */
+		se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
+		return -ENOMEM;
+	}
 	lport->tt.exch_done(cmd->seq);
 	return 0;
 }

diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index e415af3..97b486c 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c

@@ -82,6 +82,10 @@
 
 	if (cmd->aborted)
 		return 0;
+
+	if (se_cmd->scsi_status == SAM_STAT_TASK_SET_FULL)
+		goto queue_status;
+
 	ep = fc_seq_exch(cmd->seq);
 	lport = ep->lp;
 	cmd->seq = lport->tt.seq_start_next(cmd->seq);
@@ -178,14 +182,23 @@
 			       FC_TYPE_FCP, f_ctl, fh_off);
 		error = lport->tt.seq_send(lport, seq, fp);
 		if (error) {
-			/* XXX For now, initiator will retry */
-			pr_err_ratelimited("%s: Failed to send frame %p, "
+			pr_info_ratelimited("%s: Failed to send frame %p, "
 						"xid <0x%x>, remaining %zu, "
 						"lso_max <0x%x>\n",
 						__func__, fp, ep->xid,
 						remaining, lport->lso_max);
+			/*
+			 * Go ahead and set TASK_SET_FULL status ignoring the
+			 * rest of the DataIN, and immediately attempt to
+			 * send the response via ft_queue_status() in order
+			 * to notify the initiator that it should reduce it's
+			 * per LUN queue_depth.
+			 */
+			se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
+			break;
 		}
 	}
+queue_status:
 	return ft_queue_status(se_cmd);
 }
 

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index e9c280f..4f4ffa4 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c

@@ -57,7 +57,8 @@
 #define TCM_VHOST_MAX_CDB_SIZE 32
 #define TCM_VHOST_DEFAULT_TAGS 256
 #define TCM_VHOST_PREALLOC_SGLS 2048
-#define TCM_VHOST_PREALLOC_PAGES 2048
+#define TCM_VHOST_PREALLOC_UPAGES 2048
+#define TCM_VHOST_PREALLOC_PROT_SGLS 512
 
 struct vhost_scsi_inflight {
 	/* Wait for the flush operation to finish */
@@ -79,10 +80,12 @@
 	u64 tvc_tag;
 	/* The number of scatterlists associated with this cmd */
 	u32 tvc_sgl_count;
+	u32 tvc_prot_sgl_count;
 	/* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
 	u32 tvc_lun;
 	/* Pointer to the SGL formatted memory from virtio-scsi */
 	struct scatterlist *tvc_sgl;
+	struct scatterlist *tvc_prot_sgl;
 	struct page **tvc_upages;
 	/* Pointer to response */
 	struct virtio_scsi_cmd_resp __user *tvc_resp;
@@ -166,7 +169,8 @@
 };
 
 enum {
-	VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG)
+	VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) |
+					       (1ULL << VIRTIO_SCSI_F_T10_PI)
 };
 
 #define VHOST_SCSI_MAX_TARGET	256
@@ -456,12 +460,16 @@
 	struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
 				struct tcm_vhost_cmd, tvc_se_cmd);
 	struct se_session *se_sess = se_cmd->se_sess;
+	int i;
 
 	if (tv_cmd->tvc_sgl_count) {
-		u32 i;
 		for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
 			put_page(sg_page(&tv_cmd->tvc_sgl[i]));
 	}
+	if (tv_cmd->tvc_prot_sgl_count) {
+		for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
+			put_page(sg_page(&tv_cmd->tvc_prot_sgl[i]));
+	}
 
 	tcm_vhost_put_inflight(tv_cmd->inflight);
 	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
@@ -713,16 +721,14 @@
 }
 
 static struct tcm_vhost_cmd *
-vhost_scsi_get_tag(struct vhost_virtqueue *vq,
-			struct tcm_vhost_tpg *tpg,
-			struct virtio_scsi_cmd_req *v_req,
-			u32 exp_data_len,
-			int data_direction)
+vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg,
+		   unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr,
+		   u32 exp_data_len, int data_direction)
 {
 	struct tcm_vhost_cmd *cmd;
 	struct tcm_vhost_nexus *tv_nexus;
 	struct se_session *se_sess;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *prot_sg;
 	struct page **pages;
 	int tag;
 
@@ -741,19 +747,24 @@
 
 	cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag];
 	sg = cmd->tvc_sgl;
+	prot_sg = cmd->tvc_prot_sgl;
 	pages = cmd->tvc_upages;
 	memset(cmd, 0, sizeof(struct tcm_vhost_cmd));
 
 	cmd->tvc_sgl = sg;
+	cmd->tvc_prot_sgl = prot_sg;
 	cmd->tvc_upages = pages;
 	cmd->tvc_se_cmd.map_tag = tag;
-	cmd->tvc_tag = v_req->tag;
-	cmd->tvc_task_attr = v_req->task_attr;
+	cmd->tvc_tag = scsi_tag;
+	cmd->tvc_lun = lun;
+	cmd->tvc_task_attr = task_attr;
 	cmd->tvc_exp_data_len = exp_data_len;
 	cmd->tvc_data_direction = data_direction;
 	cmd->tvc_nexus = tv_nexus;
 	cmd->inflight = tcm_vhost_get_inflight(vq);
 
+	memcpy(cmd->tvc_cdb, cdb, TCM_VHOST_MAX_CDB_SIZE);
+
 	return cmd;
 }
 
@@ -767,34 +778,27 @@
 		      struct scatterlist *sgl,
 		      unsigned int sgl_count,
 		      struct iovec *iov,
-		      int write)
+		      struct page **pages,
+		      bool write)
 {
 	unsigned int npages = 0, pages_nr, offset, nbytes;
 	struct scatterlist *sg = sgl;
 	void __user *ptr = iov->iov_base;
 	size_t len = iov->iov_len;
-	struct page **pages;
 	int ret, i;
 
-	if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
-		pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than"
-		       " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
-			sgl_count, TCM_VHOST_PREALLOC_SGLS);
-		return -ENOBUFS;
-	}
-
 	pages_nr = iov_num_pages(iov);
-	if (pages_nr > sgl_count)
-		return -ENOBUFS;
-
-	if (pages_nr > TCM_VHOST_PREALLOC_PAGES) {
+	if (pages_nr > sgl_count) {
 		pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
-		       " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n",
-			pages_nr, TCM_VHOST_PREALLOC_PAGES);
+		       " sgl_count: %u\n", pages_nr, sgl_count);
 		return -ENOBUFS;
 	}
-
-	pages = tv_cmd->tvc_upages;
+	if (pages_nr > TCM_VHOST_PREALLOC_UPAGES) {
+		pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
+		       " preallocated TCM_VHOST_PREALLOC_UPAGES: %u\n",
+			pages_nr, TCM_VHOST_PREALLOC_UPAGES);
+		return -ENOBUFS;
+	}
 
 	ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
 	/* No pages were pinned */
@@ -825,33 +829,32 @@
 static int
 vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
 			  struct iovec *iov,
-			  unsigned int niov,
-			  int write)
+			  int niov,
+			  bool write)
 {
-	int ret;
-	unsigned int i;
-	u32 sgl_count;
-	struct scatterlist *sg;
+	struct scatterlist *sg = cmd->tvc_sgl;
+	unsigned int sgl_count = 0;
+	int ret, i;
 
-	/*
-	 * Find out how long sglist needs to be
-	 */
-	sgl_count = 0;
 	for (i = 0; i < niov; i++)
 		sgl_count += iov_num_pages(&iov[i]);
 
-	/* TODO overflow checking */
+	if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
+		pr_err("vhost_scsi_map_iov_to_sgl() sgl_count: %u greater than"
+			" preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
+			sgl_count, TCM_VHOST_PREALLOC_SGLS);
+		return -ENOBUFS;
+	}
 
-	sg = cmd->tvc_sgl;
 	pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count);
 	sg_init_table(sg, sgl_count);
-
 	cmd->tvc_sgl_count = sgl_count;
 
-	pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count);
+	pr_debug("Mapping iovec %p for %u pages\n", &iov[0], sgl_count);
+
 	for (i = 0; i < niov; i++) {
 		ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i],
-					    write);
+					    cmd->tvc_upages, write);
 		if (ret < 0) {
 			for (i = 0; i < cmd->tvc_sgl_count; i++)
 				put_page(sg_page(&cmd->tvc_sgl[i]));
@@ -859,31 +862,70 @@
 			cmd->tvc_sgl_count = 0;
 			return ret;
 		}
-
 		sg += ret;
 		sgl_count -= ret;
 	}
 	return 0;
 }
 
+static int
+vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
+			   struct iovec *iov,
+			   int niov,
+			   bool write)
+{
+	struct scatterlist *prot_sg = cmd->tvc_prot_sgl;
+	unsigned int prot_sgl_count = 0;
+	int ret, i;
+
+	for (i = 0; i < niov; i++)
+		prot_sgl_count += iov_num_pages(&iov[i]);
+
+	if (prot_sgl_count > TCM_VHOST_PREALLOC_PROT_SGLS) {
+		pr_err("vhost_scsi_map_iov_to_prot() sgl_count: %u greater than"
+			" preallocated TCM_VHOST_PREALLOC_PROT_SGLS: %u\n",
+			prot_sgl_count, TCM_VHOST_PREALLOC_PROT_SGLS);
+		return -ENOBUFS;
+	}
+
+	pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
+		 prot_sg, prot_sgl_count);
+	sg_init_table(prot_sg, prot_sgl_count);
+	cmd->tvc_prot_sgl_count = prot_sgl_count;
+
+	for (i = 0; i < niov; i++) {
+		ret = vhost_scsi_map_to_sgl(cmd, prot_sg, prot_sgl_count, &iov[i],
+					    cmd->tvc_upages, write);
+		if (ret < 0) {
+			for (i = 0; i < cmd->tvc_prot_sgl_count; i++)
+				put_page(sg_page(&cmd->tvc_prot_sgl[i]));
+
+			cmd->tvc_prot_sgl_count = 0;
+			return ret;
+		}
+		prot_sg += ret;
+		prot_sgl_count -= ret;
+	}
+	return 0;
+}
+
 static void tcm_vhost_submission_work(struct work_struct *work)
 {
 	struct tcm_vhost_cmd *cmd =
 		container_of(work, struct tcm_vhost_cmd, work);
 	struct tcm_vhost_nexus *tv_nexus;
 	struct se_cmd *se_cmd = &cmd->tvc_se_cmd;
-	struct scatterlist *sg_ptr, *sg_bidi_ptr = NULL;
-	int rc, sg_no_bidi = 0;
+	struct scatterlist *sg_ptr, *sg_prot_ptr = NULL;
+	int rc;
 
+	/* FIXME: BIDI operation */
 	if (cmd->tvc_sgl_count) {
 		sg_ptr = cmd->tvc_sgl;
-/* FIXME: Fix BIDI operation in tcm_vhost_submission_work() */
-#if 0
-		if (se_cmd->se_cmd_flags & SCF_BIDI) {
-			sg_bidi_ptr = NULL;
-			sg_no_bidi = 0;
-		}
-#endif
+
+		if (cmd->tvc_prot_sgl_count)
+			sg_prot_ptr = cmd->tvc_prot_sgl;
+		else
+			se_cmd->prot_pto = true;
 	} else {
 		sg_ptr = NULL;
 	}
@@ -894,7 +936,7 @@
 			cmd->tvc_lun, cmd->tvc_exp_data_len,
 			cmd->tvc_task_attr, cmd->tvc_data_direction,
 			TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count,
-			sg_bidi_ptr, sg_no_bidi, NULL, 0);
+			NULL, 0, sg_prot_ptr, cmd->tvc_prot_sgl_count);
 	if (rc < 0) {
 		transport_send_check_condition_and_sense(se_cmd,
 				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
@@ -926,12 +968,18 @@
 {
 	struct tcm_vhost_tpg **vs_tpg;
 	struct virtio_scsi_cmd_req v_req;
+	struct virtio_scsi_cmd_req_pi v_req_pi;
 	struct tcm_vhost_tpg *tpg;
 	struct tcm_vhost_cmd *cmd;
-	u32 exp_data_len, data_first, data_num, data_direction;
+	u64 tag;
+	u32 exp_data_len, data_first, data_num, data_direction, prot_first;
 	unsigned out, in, i;
-	int head, ret;
-	u8 target;
+	int head, ret, data_niov, prot_niov, prot_bytes;
+	size_t req_size;
+	u16 lun;
+	u8 *target, *lunp, task_attr;
+	bool hdr_pi;
+	void *req, *cdb;
 
 	mutex_lock(&vq->mutex);
 	/*
@@ -962,7 +1010,7 @@
 			break;
 		}
 
-/* FIXME: BIDI operation */
+		/* FIXME: BIDI operation */
 		if (out == 1 && in == 1) {
 			data_direction = DMA_NONE;
 			data_first = 0;
@@ -992,29 +1040,38 @@
 			break;
 		}
 
-		if (unlikely(vq->iov[0].iov_len != sizeof(v_req))) {
-			vq_err(vq, "Expecting virtio_scsi_cmd_req, got %zu"
-				" bytes\n", vq->iov[0].iov_len);
+		if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI)) {
+			req = &v_req_pi;
+			lunp = &v_req_pi.lun[0];
+			target = &v_req_pi.lun[1];
+			req_size = sizeof(v_req_pi);
+			hdr_pi = true;
+		} else {
+			req = &v_req;
+			lunp = &v_req.lun[0];
+			target = &v_req.lun[1];
+			req_size = sizeof(v_req);
+			hdr_pi = false;
+		}
+
+		if (unlikely(vq->iov[0].iov_len < req_size)) {
+			pr_err("Expecting virtio-scsi header: %zu, got %zu\n",
+			       req_size, vq->iov[0].iov_len);
 			break;
 		}
-		pr_debug("Calling __copy_from_user: vq->iov[0].iov_base: %p,"
-			" len: %zu\n", vq->iov[0].iov_base, sizeof(v_req));
-		ret = __copy_from_user(&v_req, vq->iov[0].iov_base,
-				sizeof(v_req));
+		ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size);
 		if (unlikely(ret)) {
 			vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
 			break;
 		}
 
 		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
-		if (unlikely(v_req.lun[0] != 1)) {
+		if (unlikely(*lunp != 1)) {
 			vhost_scsi_send_bad_target(vs, vq, head, out);
 			continue;
 		}
 
-		/* Extract the tpgt */
-		target = v_req.lun[1];
-		tpg = ACCESS_ONCE(vs_tpg[target]);
+		tpg = ACCESS_ONCE(vs_tpg[*target]);
 
 		/* Target does not exist, fail the request */
 		if (unlikely(!tpg)) {
@@ -1022,17 +1079,79 @@
 			continue;
 		}
 
-		exp_data_len = 0;
-		for (i = 0; i < data_num; i++)
-			exp_data_len += vq->iov[data_first + i].iov_len;
+		data_niov = data_num;
+		prot_niov = prot_first = prot_bytes = 0;
+		/*
+		 * Determine if any protection information iovecs are preceeding
+		 * the actual data payload, and adjust data_first + data_niov
+		 * values accordingly for vhost_scsi_map_iov_to_sgl() below.
+		 *
+		 * Also extract virtio_scsi header bits for vhost_scsi_get_tag()
+		 */
+		if (hdr_pi) {
+			if (v_req_pi.pi_bytesout) {
+				if (data_direction != DMA_TO_DEVICE) {
+					vq_err(vq, "Received non zero do_pi_niov"
+						", but wrong data_direction\n");
+					goto err_cmd;
+				}
+				prot_bytes = v_req_pi.pi_bytesout;
+			} else if (v_req_pi.pi_bytesin) {
+				if (data_direction != DMA_FROM_DEVICE) {
+					vq_err(vq, "Received non zero di_pi_niov"
+						", but wrong data_direction\n");
+					goto err_cmd;
+				}
+				prot_bytes = v_req_pi.pi_bytesin;
+			}
+			if (prot_bytes) {
+				int tmp = 0;
 
-		cmd = vhost_scsi_get_tag(vq, tpg, &v_req,
-					 exp_data_len, data_direction);
+				for (i = 0; i < data_num; i++) {
+					tmp += vq->iov[data_first + i].iov_len;
+					prot_niov++;
+					if (tmp >= prot_bytes)
+						break;
+				}
+				prot_first = data_first;
+				data_first += prot_niov;
+				data_niov = data_num - prot_niov;
+			}
+			tag = v_req_pi.tag;
+			task_attr = v_req_pi.task_attr;
+			cdb = &v_req_pi.cdb[0];
+			lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF;
+		} else {
+			tag = v_req.tag;
+			task_attr = v_req.task_attr;
+			cdb = &v_req.cdb[0];
+			lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
+		}
+		exp_data_len = 0;
+		for (i = 0; i < data_niov; i++)
+			exp_data_len += vq->iov[data_first + i].iov_len;
+		/*
+		 * Check that the recieved CDB size does not exceeded our
+		 * hardcoded max for vhost-scsi
+		 *
+		 * TODO what if cdb was too small for varlen cdb header?
+		 */
+		if (unlikely(scsi_command_size(cdb) > TCM_VHOST_MAX_CDB_SIZE)) {
+			vq_err(vq, "Received SCSI CDB with command_size: %d that"
+				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
+				scsi_command_size(cdb), TCM_VHOST_MAX_CDB_SIZE);
+			goto err_cmd;
+		}
+
+		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
+					 exp_data_len + prot_bytes,
+					 data_direction);
 		if (IS_ERR(cmd)) {
 			vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
 					PTR_ERR(cmd));
 			goto err_cmd;
 		}
+
 		pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction"
 			": %d\n", cmd, exp_data_len, data_direction);
 
@@ -1040,40 +1159,28 @@
 		cmd->tvc_vq = vq;
 		cmd->tvc_resp = vq->iov[out].iov_base;
 
-		/*
-		 * Copy in the recieved CDB descriptor into cmd->tvc_cdb
-		 * that will be used by tcm_vhost_new_cmd_map() and down into
-		 * target_setup_cmd_from_cdb()
-		 */
-		memcpy(cmd->tvc_cdb, v_req.cdb, TCM_VHOST_MAX_CDB_SIZE);
-		/*
-		 * Check that the recieved CDB size does not exceeded our
-		 * hardcoded max for tcm_vhost
-		 */
-		/* TODO what if cdb was too small for varlen cdb header? */
-		if (unlikely(scsi_command_size(cmd->tvc_cdb) >
-					TCM_VHOST_MAX_CDB_SIZE)) {
-			vq_err(vq, "Received SCSI CDB with command_size: %d that"
-				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
-				scsi_command_size(cmd->tvc_cdb),
-				TCM_VHOST_MAX_CDB_SIZE);
-			goto err_free;
-		}
-		cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
-
 		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
 			cmd->tvc_cdb[0], cmd->tvc_lun);
 
+		if (prot_niov) {
+			ret = vhost_scsi_map_iov_to_prot(cmd,
+					&vq->iov[prot_first], prot_niov,
+					data_direction == DMA_FROM_DEVICE);
+			if (unlikely(ret)) {
+				vq_err(vq, "Failed to map iov to"
+					" prot_sgl\n");
+				goto err_free;
+			}
+		}
 		if (data_direction != DMA_NONE) {
 			ret = vhost_scsi_map_iov_to_sgl(cmd,
-					&vq->iov[data_first], data_num,
+					&vq->iov[data_first], data_niov,
 					data_direction == DMA_FROM_DEVICE);
 			if (unlikely(ret)) {
 				vq_err(vq, "Failed to map iov to sgl\n");
 				goto err_free;
 			}
 		}
-
 		/*
 		 * Save the descriptor from vhost_get_vq_desc() to be used to
 		 * complete the virtio-scsi request in TCM callback context via
@@ -1716,6 +1823,7 @@
 		tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i];
 
 		kfree(tv_cmd->tvc_sgl);
+		kfree(tv_cmd->tvc_prot_sgl);
 		kfree(tv_cmd->tvc_upages);
 	}
 }
@@ -1750,7 +1858,7 @@
 	tv_nexus->tvn_se_sess = transport_init_session_tags(
 					TCM_VHOST_DEFAULT_TAGS,
 					sizeof(struct tcm_vhost_cmd),
-					TARGET_PROT_NORMAL);
+					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
 	if (IS_ERR(tv_nexus->tvn_se_sess)) {
 		mutex_unlock(&tpg->tv_tpg_mutex);
 		kfree(tv_nexus);
@@ -1769,12 +1877,20 @@
 		}
 
 		tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) *
-					TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL);
+					TCM_VHOST_PREALLOC_UPAGES, GFP_KERNEL);
 		if (!tv_cmd->tvc_upages) {
 			mutex_unlock(&tpg->tv_tpg_mutex);
 			pr_err("Unable to allocate tv_cmd->tvc_upages\n");
 			goto out;
 		}
+
+		tv_cmd->tvc_prot_sgl = kzalloc(sizeof(struct scatterlist) *
+					TCM_VHOST_PREALLOC_PROT_SGLS, GFP_KERNEL);
+		if (!tv_cmd->tvc_prot_sgl) {
+			mutex_unlock(&tpg->tv_tpg_mutex);
+			pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
+			goto out;
+		}
 	}
 	/*
 	 * Since we are running in 'demo mode' this call with generate a

diff --git a/firmware/Makefile b/firmware/Makefile
index cbb09ce..5747417 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile

@@ -4,10 +4,10 @@
 
 # Create $(fwabs) from $(CONFIG_EXTRA_FIRMWARE_DIR) -- if it doesn't have a
 # leading /, it's relative to $(srctree).
-fwdir := $(subst ",,$(CONFIG_EXTRA_FIRMWARE_DIR))
+fwdir := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE_DIR))
 fwabs := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir))
 
-fw-external-y := $(subst ",,$(CONFIG_EXTRA_FIRMWARE))
+fw-external-y := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE))
 
 # There are three cases to care about:
 # 1. Building kernel with CONFIG_FIRMWARE_IN_KERNEL=y -- $(fw-shipped-y) should
@@ -138,12 +138,6 @@
 
 fw-shipped-all := $(fw-shipped-y) $(fw-shipped-m) $(fw-shipped-)
 
-# Directories which we _might_ need to create, so we have a rule for them.
-firmware-dirs := $(sort $(addprefix $(objtree)/$(obj)/,$(dir $(fw-external-y) $(fw-shipped-all))))
-
-quiet_cmd_mkdir = MKDIR   $(patsubst $(objtree)/%,%,$@)
-      cmd_mkdir = mkdir -p $@
-
 quiet_cmd_ihex  = IHEX    $@
       cmd_ihex  = $(OBJCOPY) -Iihex -Obinary $< $@
 
@@ -184,21 +178,10 @@
 		include/config/superh32.h include/config/superh64.h \
 		include/config/x86_32.h include/config/x86_64.h)
 
-# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
-# It'll end up depending on these targets, so make them a PHONY rule which
-# depends on _all_ the directories in $(firmware-dirs), and it'll work out OK.
-PHONY += $(objtree)/$$(%) $(objtree)/$(obj)/$$(%)
-$(objtree)/$$(%) $(objtree)/$(obj)/$$(%): $(firmware-dirs)
-	@true
-
-# For the $$(dir %) trick, where we need % to be expanded first.
-.SECONDEXPANSION:
-
-$(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps) \
-		| $(objtree)/$$(dir %)
+$(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps)
 	$(call cmd,fwbin,$(patsubst %.gen.S,%,$@))
 $(patsubst %,$(obj)/%.gen.S, $(fw-external-y)): %: $(wordsize_deps) \
-		include/config/extra/firmware/dir.h | $(objtree)/$$(dir %)
+		include/config/extra/firmware/dir.h
 	$(call cmd,fwbin,$(fwabs)/$(patsubst $(obj)/%.gen.S,%,$@))
 
 # The .o files depend on the binaries directly; the .S files don't.
@@ -207,7 +190,7 @@
 
 # .ihex is used just as a simple way to hold binary files in a source tree
 # where binaries are frowned upon. They are directly converted with objcopy.
-$(obj)/%: $(obj)/%.ihex | $(objtree)/$(obj)/$$(dir %)
+$(obj)/%: $(obj)/%.ihex
 	$(call cmd,ihex)
 
 # Don't depend on ihex2fw if we're installing and it already exists.
@@ -226,16 +209,13 @@
 # is actually meaningful, because the firmware has to be loaded in a certain
 # order rather than as a single binary blob. Thus, we convert them into our
 # more compact binary representation of ihex records (<linux/ihex.h>)
-$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %)
+$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep)
 	$(call cmd,ihex2fw)
 
 # .H16 is our own modified form of Intel HEX, with 16-bit length for records.
-$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %)
+$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep)
 	$(call cmd,h16tofw)
 
-$(firmware-dirs):
-	$(call cmd,mkdir)
-
 obj-y				 += $(patsubst %,%.gen.o, $(fw-external-y))
 obj-$(CONFIG_FIRMWARE_IN_KERNEL) += $(patsubst %,%.gen.o, $(fw-shipped-y))
 

diff --git a/fs/aio.c b/fs/aio.c
index 56b2860..4f078c0 100644
--- a/fs/aio.c
+++ b/fs/aio.c

@@ -477,7 +477,7 @@
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
+static int kiocb_cancel(struct kiocb *kiocb)
 {
 	kiocb_cancel_fn *old, *cancel;
 
@@ -538,7 +538,7 @@
 				       struct kiocb, ki_list);
 
 		list_del_init(&req->ki_list);
-		kiocb_cancel(ctx, req);
+		kiocb_cancel(req);
 	}
 
 	spin_unlock_irq(&ctx->ctx_lock);
@@ -727,42 +727,42 @@
  *	when the processes owning a context have all exited to encourage
  *	the rapid destruction of the kioctx.
  */
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 		struct completion *requests_done)
 {
-	if (!atomic_xchg(&ctx->dead, 1)) {
-		struct kioctx_table *table;
+	struct kioctx_table *table;
 
-		spin_lock(&mm->ioctx_lock);
-		rcu_read_lock();
-		table = rcu_dereference(mm->ioctx_table);
+	if (atomic_xchg(&ctx->dead, 1))
+		return -EINVAL;
 
-		WARN_ON(ctx != table->table[ctx->id]);
-		table->table[ctx->id] = NULL;
-		rcu_read_unlock();
-		spin_unlock(&mm->ioctx_lock);
 
-		/* percpu_ref_kill() will do the necessary call_rcu() */
-		wake_up_all(&ctx->wait);
+	spin_lock(&mm->ioctx_lock);
+	rcu_read_lock();
+	table = rcu_dereference(mm->ioctx_table);
 
-		/*
-		 * It'd be more correct to do this in free_ioctx(), after all
-		 * the outstanding kiocbs have finished - but by then io_destroy
-		 * has already returned, so io_setup() could potentially return
-		 * -EAGAIN with no ioctxs actually in use (as far as userspace
-		 *  could tell).
-		 */
-		aio_nr_sub(ctx->max_reqs);
+	WARN_ON(ctx != table->table[ctx->id]);
+	table->table[ctx->id] = NULL;
+	rcu_read_unlock();
+	spin_unlock(&mm->ioctx_lock);
 
-		if (ctx->mmap_size)
-			vm_munmap(ctx->mmap_base, ctx->mmap_size);
+	/* percpu_ref_kill() will do the necessary call_rcu() */
+	wake_up_all(&ctx->wait);
 
-		ctx->requests_done = requests_done;
-		percpu_ref_kill(&ctx->users);
-	} else {
-		if (requests_done)
-			complete(requests_done);
-	}
+	/*
+	 * It'd be more correct to do this in free_ioctx(), after all
+	 * the outstanding kiocbs have finished - but by then io_destroy
+	 * has already returned, so io_setup() could potentially return
+	 * -EAGAIN with no ioctxs actually in use (as far as userspace
+	 *  could tell).
+	 */
+	aio_nr_sub(ctx->max_reqs);
+
+	if (ctx->mmap_size)
+		vm_munmap(ctx->mmap_base, ctx->mmap_size);
+
+	ctx->requests_done = requests_done;
+	percpu_ref_kill(&ctx->users);
+	return 0;
 }
 
 /* wait_on_sync_kiocb:
@@ -1219,21 +1219,23 @@
 	if (likely(NULL != ioctx)) {
 		struct completion requests_done =
 			COMPLETION_INITIALIZER_ONSTACK(requests_done);
+		int ret;
 
 		/* Pass requests_done to kill_ioctx() where it can be set
 		 * in a thread-safe way. If we try to set it here then we have
 		 * a race condition if two io_destroy() called simultaneously.
 		 */
-		kill_ioctx(current->mm, ioctx, &requests_done);
+		ret = kill_ioctx(current->mm, ioctx, &requests_done);
 		percpu_ref_put(&ioctx->users);
 
 		/* Wait until all IO for the context are done. Otherwise kernel
 		 * keep using user-space buffers even if user thinks the context
 		 * is destroyed.
 		 */
-		wait_for_completion(&requests_done);
+		if (!ret)
+			wait_for_completion(&requests_done);
 
-		return 0;
+		return ret;
 	}
 	pr_debug("EINVAL: io_destroy: invalid context id\n");
 	return -EINVAL;
@@ -1595,7 +1597,7 @@
 
 	kiocb = lookup_kiocb(ctx, iocb, key);
 	if (kiocb)
-		ret = kiocb_cancel(ctx, kiocb);
+		ret = kiocb_cancel(kiocb);
 	else
 		ret = -EINVAL;
 

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f25a909..a389820 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c

@@ -2354,7 +2354,7 @@
 {
 	int uptodate = (err == 0);
 	struct extent_io_tree *tree;
-	int ret;
+	int ret = 0;
 
 	tree = &BTRFS_I(page->mapping->host)->io_tree;
 
@@ -5068,6 +5068,43 @@
 	}
 }
 
+int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
+			unsigned long start,
+			unsigned long len)
+{
+	size_t cur;
+	size_t offset;
+	struct page *page;
+	char *kaddr;
+	char __user *dst = (char __user *)dstv;
+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	int ret = 0;
+
+	WARN_ON(start > eb->len);
+	WARN_ON(start + len > eb->start + eb->len);
+
+	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+
+	while (len > 0) {
+		page = extent_buffer_page(eb, i);
+
+		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		kaddr = page_address(page);
+		if (copy_to_user(dst, kaddr + offset, cur)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		dst += cur;
+		len -= cur;
+		offset = 0;
+		i++;
+	}
+
+	return ret;
+}
+
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long min_len, char **map,
 			       unsigned long *map_start,

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 8b63f2d..15ce5f2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h

@@ -304,6 +304,9 @@
 void read_extent_buffer(struct extent_buffer *eb, void *dst,
 			unsigned long start,
 			unsigned long len);
+int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
+			       unsigned long start,
+			       unsigned long len);
 void write_extent_buffer(struct extent_buffer *eb, const void *src,
 			 unsigned long start, unsigned long len);
 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 82c18ba..0d321c2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c

@@ -1957,7 +1957,8 @@
 			       struct btrfs_path *path,
 			       struct btrfs_key *key,
 			       struct btrfs_ioctl_search_key *sk,
-			       char *buf,
+			       size_t *buf_size,
+			       char __user *ubuf,
 			       unsigned long *sk_offset,
 			       int *num_found)
 {
@@ -1989,13 +1990,25 @@
 		if (!key_in_sk(key, sk))
 			continue;
 
-		if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
-			item_len = 0;
+		if (sizeof(sh) + item_len > *buf_size) {
+			if (*num_found) {
+				ret = 1;
+				goto out;
+			}
 
-		if (sizeof(sh) + item_len + *sk_offset >
-		    BTRFS_SEARCH_ARGS_BUFSIZE) {
+			/*
+			 * return one empty item back for v1, which does not
+			 * handle -EOVERFLOW
+			 */
+
+			*buf_size = sizeof(sh) + item_len;
+			item_len = 0;
+			ret = -EOVERFLOW;
+		}
+
+		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
 			ret = 1;
-			goto overflow;
+			goto out;
 		}
 
 		sh.objectid = key->objectid;
@@ -2005,20 +2018,33 @@
 		sh.transid = found_transid;
 
 		/* copy search result header */
-		memcpy(buf + *sk_offset, &sh, sizeof(sh));
+		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
 		*sk_offset += sizeof(sh);
 
 		if (item_len) {
-			char *p = buf + *sk_offset;
+			char __user *up = ubuf + *sk_offset;
 			/* copy the item */
-			read_extent_buffer(leaf, p,
-					   item_off, item_len);
+			if (read_extent_buffer_to_user(leaf, up,
+						       item_off, item_len)) {
+				ret = -EFAULT;
+				goto out;
+			}
+
 			*sk_offset += item_len;
 		}
 		(*num_found)++;
 
-		if (*num_found >= sk->nr_items)
-			break;
+		if (ret) /* -EOVERFLOW from above */
+			goto out;
+
+		if (*num_found >= sk->nr_items) {
+			ret = 1;
+			goto out;
+		}
 	}
 advance_key:
 	ret = 0;
@@ -2033,22 +2059,37 @@
 		key->objectid++;
 	} else
 		ret = 1;
-overflow:
+out:
+	/*
+	 *  0: all items from this leaf copied, continue with next
+	 *  1: * more items can be copied, but unused buffer is too small
+	 *     * all items were found
+	 *     Either way, it will stops the loop which iterates to the next
+	 *     leaf
+	 *  -EOVERFLOW: item was to large for buffer
+	 *  -EFAULT: could not copy extent buffer back to userspace
+	 */
 	return ret;
 }
 
 static noinline int search_ioctl(struct inode *inode,
-				 struct btrfs_ioctl_search_args *args)
+				 struct btrfs_ioctl_search_key *sk,
+				 size_t *buf_size,
+				 char __user *ubuf)
 {
 	struct btrfs_root *root;
 	struct btrfs_key key;
 	struct btrfs_path *path;
-	struct btrfs_ioctl_search_key *sk = &args->key;
 	struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
 	int ret;
 	int num_found = 0;
 	unsigned long sk_offset = 0;
 
+	if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
+		*buf_size = sizeof(struct btrfs_ioctl_search_header);
+		return -EOVERFLOW;
+	}
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -2082,14 +2123,15 @@
 				ret = 0;
 			goto err;
 		}
-		ret = copy_to_sk(root, path, &key, sk, args->buf,
+		ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
 				 &sk_offset, &num_found);
 		btrfs_release_path(path);
-		if (ret || num_found >= sk->nr_items)
+		if (ret)
 			break;
 
 	}
-	ret = 0;
+	if (ret > 0)
+		ret = 0;
 err:
 	sk->nr_items = num_found;
 	btrfs_free_path(path);
@@ -2099,22 +2141,73 @@
 static noinline int btrfs_ioctl_tree_search(struct file *file,
 					   void __user *argp)
 {
-	 struct btrfs_ioctl_search_args *args;
-	 struct inode *inode;
-	 int ret;
+	struct btrfs_ioctl_search_args __user *uargs;
+	struct btrfs_ioctl_search_key sk;
+	struct inode *inode;
+	int ret;
+	size_t buf_size;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	args = memdup_user(argp, sizeof(*args));
-	if (IS_ERR(args))
-		return PTR_ERR(args);
+	uargs = (struct btrfs_ioctl_search_args __user *)argp;
+
+	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
+		return -EFAULT;
+
+	buf_size = sizeof(uargs->buf);
 
 	inode = file_inode(file);
-	ret = search_ioctl(inode, args);
-	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
+
+	/*
+	 * In the origin implementation an overflow is handled by returning a
+	 * search header with a len of zero, so reset ret.
+	 */
+	if (ret == -EOVERFLOW)
+		ret = 0;
+
+	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
 		ret = -EFAULT;
-	kfree(args);
+	return ret;
+}
+
+static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+					       void __user *argp)
+{
+	struct btrfs_ioctl_search_args_v2 __user *uarg;
+	struct btrfs_ioctl_search_args_v2 args;
+	struct inode *inode;
+	int ret;
+	size_t buf_size;
+	const size_t buf_limit = 16 * 1024 * 1024;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* copy search header and buffer size */
+	uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	buf_size = args.buf_size;
+
+	if (buf_size < sizeof(struct btrfs_ioctl_search_header))
+		return -EOVERFLOW;
+
+	/* limit result size to 16MB */
+	if (buf_size > buf_limit)
+		buf_size = buf_limit;
+
+	inode = file_inode(file);
+	ret = search_ioctl(inode, &args.key, &buf_size,
+			   (char *)(&uarg->buf[0]));
+	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
+		ret = -EFAULT;
+	else if (ret == -EOVERFLOW &&
+		copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
+		ret = -EFAULT;
+
 	return ret;
 }
 
@@ -5198,6 +5291,8 @@
 		return btrfs_ioctl_trans_end(file);
 	case BTRFS_IOC_TREE_SEARCH:
 		return btrfs_ioctl_tree_search(file, argp);
+	case BTRFS_IOC_TREE_SEARCH_V2:
+		return btrfs_ioctl_tree_search_v2(file, argp);
 	case BTRFS_IOC_INO_LOOKUP:
 		return btrfs_ioctl_ino_lookup(file, argp);
 	case BTRFS_IOC_INO_PATHS:

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index cf5aead..98cb6b2 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c

@@ -1798,8 +1798,10 @@
 		return -ENOMEM;
 
 	tmp = ulist_alloc(GFP_NOFS);
-	if (!tmp)
+	if (!tmp) {
+		ulist_free(qgroups);
 		return -ENOMEM;
+	}
 
 	btrfs_get_tree_mod_seq(fs_info, &elem);
 	ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 30947f9..09230cf 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c

@@ -428,8 +428,13 @@
 			continue;
 		}
 		if (!dev->bdev) {
-			/* cannot read ahead on missing device */
-			continue;
+			/*
+			 * cannot read ahead on missing device, but for RAID5/6,
+			 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
+			 * device for such case.
+			 */
+			if (nzones > 1)
+				continue;
 		}
 		if (dev_replace_is_ongoing &&
 		    dev == fs_info->dev_replace.tgtdev) {

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index a5dcacb..9626252 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c

@@ -135,7 +135,7 @@
 	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
 		struct extent_buffer *eb;
 
-		eb = radix_tree_deref_slot(slot);
+		eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
 		if (!eb)
 			continue;
 		/* Shouldn't happen but that kind of thinking creates CVE's */

diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index fa691b7..ec3dcb2 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c

@@ -415,6 +415,8 @@
 		ret = -ENOMEM;
 		goto out;
 	}
+	btrfs_set_header_level(root->node, 0);
+	btrfs_set_header_nritems(root->node, 0);
 	root->alloc_bytenr += 8192;
 
 	tmp_root = btrfs_alloc_dummy_root();

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9630f10..511839c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c

@@ -1284,11 +1284,13 @@
 		goto fail;
 	}
 
-	pending->error = btrfs_qgroup_inherit(trans, fs_info,
-					      root->root_key.objectid,
-					      objectid, pending->inherit);
-	if (pending->error)
-		goto no_free_objectid;
+	ret = btrfs_qgroup_inherit(trans, fs_info,
+				   root->root_key.objectid,
+				   objectid, pending->inherit);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto fail;
+	}
 
 	/* see comments in should_cow_block() */
 	set_bit(BTRFS_ROOT_FORCE_COW, &root->state);

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 21887d6..469f2e8 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c

@@ -104,12 +104,6 @@
 	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
 	struct dentry *dentry;
 
-	if (acl) {
-		ret = posix_acl_valid(acl);
-		if (ret < 0)
-			goto out;
-	}
-
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4f3f690..90b3954 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -211,18 +211,15 @@
 		SetPageError(page);
 		ceph_fscache_readpage_cancel(inode, page);
 		goto out;
-	} else {
-		if (err < PAGE_CACHE_SIZE) {
-		/* zero fill remainder of page */
-			zero_user_segment(page, err, PAGE_CACHE_SIZE);
-		} else {
-			flush_dcache_page(page);
-		}
 	}
-	SetPageUptodate(page);
+	if (err < PAGE_CACHE_SIZE)
+		/* zero fill remainder of page */
+		zero_user_segment(page, err, PAGE_CACHE_SIZE);
+	else
+		flush_dcache_page(page);
 
-	if (err >= 0)
-		ceph_readpage_to_fscache(inode, page);
+	SetPageUptodate(page);
+	ceph_readpage_to_fscache(inode, page);
 
 out:
 	return err < 0 ? err : 0;

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c561b62..1fde164 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -221,8 +221,8 @@
 	return 0;
 }
 
-static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
-				struct ceph_cap_reservation *ctx)
+struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
+			      struct ceph_cap_reservation *ctx)
 {
 	struct ceph_cap *cap = NULL;
 
@@ -508,15 +508,14 @@
  * it is < 0.  (This is so we can atomically add the cap and add an
  * open file reference to it.)
  */
-int ceph_add_cap(struct inode *inode,
-		 struct ceph_mds_session *session, u64 cap_id,
-		 int fmode, unsigned issued, unsigned wanted,
-		 unsigned seq, unsigned mseq, u64 realmino, int flags,
-		 struct ceph_cap_reservation *caps_reservation)
+void ceph_add_cap(struct inode *inode,
+		  struct ceph_mds_session *session, u64 cap_id,
+		  int fmode, unsigned issued, unsigned wanted,
+		  unsigned seq, unsigned mseq, u64 realmino, int flags,
+		  struct ceph_cap **new_cap)
 {
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *new_cap = NULL;
 	struct ceph_cap *cap;
 	int mds = session->s_mds;
 	int actual_wanted;
@@ -531,20 +530,10 @@
 	if (fmode >= 0)
 		wanted |= ceph_caps_for_mode(fmode);
 
-retry:
-	spin_lock(&ci->i_ceph_lock);
 	cap = __get_cap_for_mds(ci, mds);
 	if (!cap) {
-		if (new_cap) {
-			cap = new_cap;
-			new_cap = NULL;
-		} else {
-			spin_unlock(&ci->i_ceph_lock);
-			new_cap = get_cap(mdsc, caps_reservation);
-			if (new_cap == NULL)
-				return -ENOMEM;
-			goto retry;
-		}
+		cap = *new_cap;
+		*new_cap = NULL;
 
 		cap->issued = 0;
 		cap->implemented = 0;
@@ -562,9 +551,6 @@
 		session->s_nr_caps++;
 		spin_unlock(&session->s_cap_lock);
 	} else {
-		if (new_cap)
-			ceph_put_cap(mdsc, new_cap);
-
 		/*
 		 * auth mds of the inode changed. we received the cap export
 		 * message, but still haven't received the cap import message.
@@ -626,7 +612,6 @@
 			ci->i_auth_cap = cap;
 			cap->mds_wanted = wanted;
 		}
-		ci->i_cap_exporting_issued = 0;
 	} else {
 		WARN_ON(ci->i_auth_cap == cap);
 	}
@@ -648,9 +633,6 @@
 
 	if (fmode >= 0)
 		__ceph_get_fmode(ci, fmode);
-	spin_unlock(&ci->i_ceph_lock);
-	wake_up_all(&ci->i_cap_wq);
-	return 0;
 }
 
 /*
@@ -685,7 +667,7 @@
  */
 int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
 {
-	int have = ci->i_snap_caps | ci->i_cap_exporting_issued;
+	int have = ci->i_snap_caps;
 	struct ceph_cap *cap;
 	struct rb_node *p;
 
@@ -900,7 +882,7 @@
  */
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
-	return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued;
+	return !RB_EMPTY_ROOT(&ci->i_caps);
 }
 
 int ceph_is_any_caps(struct inode *inode)
@@ -2397,32 +2379,30 @@
  * actually be a revocation if it specifies a smaller cap set.)
  *
  * caller holds s_mutex and i_ceph_lock, we drop both.
- *
- * return value:
- *  0 - ok
- *  1 - check_caps on auth cap only (writeback)
- *  2 - check_caps (ack revoke)
  */
-static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+static void handle_cap_grant(struct ceph_mds_client *mdsc,
+			     struct inode *inode, struct ceph_mds_caps *grant,
+			     void *snaptrace, int snaptrace_len,
+			     struct ceph_buffer *xattr_buf,
 			     struct ceph_mds_session *session,
-			     struct ceph_cap *cap,
-			     struct ceph_buffer *xattr_buf)
-		__releases(ci->i_ceph_lock)
+			     struct ceph_cap *cap, int issued)
+	__releases(ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int mds = session->s_mds;
 	int seq = le32_to_cpu(grant->seq);
 	int newcaps = le32_to_cpu(grant->caps);
-	int issued, implemented, used, wanted, dirty;
+	int used, wanted, dirty;
 	u64 size = le64_to_cpu(grant->size);
 	u64 max_size = le64_to_cpu(grant->max_size);
 	struct timespec mtime, atime, ctime;
 	int check_caps = 0;
-	int wake = 0;
-	int writeback = 0;
-	int queue_invalidate = 0;
-	int deleted_inode = 0;
-	int queue_revalidate = 0;
+	bool wake = 0;
+	bool writeback = 0;
+	bool queue_trunc = 0;
+	bool queue_invalidate = 0;
+	bool queue_revalidate = 0;
+	bool deleted_inode = 0;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2466,16 +2446,13 @@
 	}
 
 	/* side effects now are allowed */
-
-	issued = __ceph_caps_issued(ci, &implemented);
-	issued |= implemented | __ceph_caps_dirty(ci);
-
 	cap->cap_gen = session->s_cap_gen;
 	cap->seq = seq;
 
 	__check_cap_issue(ci, cap, newcaps);
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(grant->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2484,7 +2461,8 @@
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
 		if (inode->i_nlink == 0 &&
 		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2511,30 +2489,35 @@
 	if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
 		queue_revalidate = 1;
 
-	/* size/ctime/mtime/atime? */
-	ceph_fill_file_size(inode, issued,
-			    le32_to_cpu(grant->truncate_seq),
-			    le64_to_cpu(grant->truncate_size), size);
-	ceph_decode_timespec(&mtime, &grant->mtime);
-	ceph_decode_timespec(&atime, &grant->atime);
-	ceph_decode_timespec(&ctime, &grant->ctime);
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
-			    &atime);
+	if (newcaps & CEPH_CAP_ANY_RD) {
+		/* ctime/mtime/atime? */
+		ceph_decode_timespec(&mtime, &grant->mtime);
+		ceph_decode_timespec(&atime, &grant->atime);
+		ceph_decode_timespec(&ctime, &grant->ctime);
+		ceph_fill_file_time(inode, issued,
+				    le32_to_cpu(grant->time_warp_seq),
+				    &ctime, &mtime, &atime);
+	}
 
-
-	/* file layout may have changed */
-	ci->i_layout = grant->layout;
-
-	/* max size increase? */
-	if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
-		ci->i_max_size = max_size;
-		if (max_size >= ci->i_wanted_max_size) {
-			ci->i_wanted_max_size = 0;  /* reset */
-			ci->i_requested_max_size = 0;
+	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
+		/* file layout may have changed */
+		ci->i_layout = grant->layout;
+		/* size/truncate_seq? */
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(grant->truncate_seq),
+					le64_to_cpu(grant->truncate_size),
+					size);
+		/* max size increase? */
+		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+			dout("max_size %lld -> %llu\n",
+			     ci->i_max_size, max_size);
+			ci->i_max_size = max_size;
+			if (max_size >= ci->i_wanted_max_size) {
+				ci->i_wanted_max_size = 0;  /* reset */
+				ci->i_requested_max_size = 0;
+			}
+			wake = 1;
 		}
-		wake = 1;
 	}
 
 	/* check cap bits */
@@ -2595,6 +2578,23 @@
 
 	spin_unlock(&ci->i_ceph_lock);
 
+	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
+		down_write(&mdsc->snap_rwsem);
+		ceph_update_snap_trace(mdsc, snaptrace,
+				       snaptrace + snaptrace_len, false);
+		downgrade_write(&mdsc->snap_rwsem);
+		kick_flushing_inode_caps(mdsc, session, inode);
+		up_read(&mdsc->snap_rwsem);
+		if (newcaps & ~issued)
+			wake = 1;
+	}
+
+	if (queue_trunc) {
+		ceph_queue_vmtruncate(inode);
+		ceph_queue_revalidate(inode);
+	} else if (queue_revalidate)
+		ceph_queue_revalidate(inode);
+
 	if (writeback)
 		/*
 		 * queue inode for writeback: we can't actually call
@@ -2606,8 +2606,6 @@
 		ceph_queue_invalidate(inode);
 	if (deleted_inode)
 		invalidate_aliases(inode);
-	if (queue_revalidate)
-		ceph_queue_revalidate(inode);
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
@@ -2784,7 +2782,7 @@
 {
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_session *tsession = NULL;
-	struct ceph_cap *cap, *tcap;
+	struct ceph_cap *cap, *tcap, *new_cap = NULL;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 t_cap_id;
 	unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2807,7 +2805,7 @@
 retry:
 	spin_lock(&ci->i_ceph_lock);
 	cap = __get_cap_for_mds(ci, mds);
-	if (!cap)
+	if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
 		goto out_unlock;
 
 	if (target < 0) {
@@ -2846,15 +2844,14 @@
 		}
 		__ceph_remove_cap(cap, false);
 		goto out_unlock;
-	}
-
-	if (tsession) {
-		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
-		spin_unlock(&ci->i_ceph_lock);
+	} else if (tsession) {
 		/* add placeholder for the export tagert */
+		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
 		ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
-			     t_seq - 1, t_mseq, (u64)-1, flag, NULL);
-		goto retry;
+			     t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
+
+		__ceph_remove_cap(cap, false);
+		goto out_unlock;
 	}
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -2873,6 +2870,7 @@
 					  SINGLE_DEPTH_NESTING);
 		}
 		ceph_add_cap_releases(mdsc, tsession);
+		new_cap = ceph_get_cap(mdsc, NULL);
 	} else {
 		WARN_ON(1);
 		tsession = NULL;
@@ -2887,24 +2885,27 @@
 		mutex_unlock(&tsession->s_mutex);
 		ceph_put_mds_session(tsession);
 	}
+	if (new_cap)
+		ceph_put_cap(mdsc, new_cap);
 }
 
 /*
- * Handle cap IMPORT.  If there are temp bits from an older EXPORT,
- * clean them up.
+ * Handle cap IMPORT.
  *
- * caller holds s_mutex.
+ * caller holds s_mutex. acquires i_ceph_lock
  */
 static void handle_cap_import(struct ceph_mds_client *mdsc,
 			      struct inode *inode, struct ceph_mds_caps *im,
 			      struct ceph_mds_cap_peer *ph,
 			      struct ceph_mds_session *session,
-			      void *snaptrace, int snaptrace_len)
+			      struct ceph_cap **target_cap, int *old_issued)
+	__acquires(ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *cap;
+	struct ceph_cap *cap, *ocap, *new_cap = NULL;
 	int mds = session->s_mds;
-	unsigned issued = le32_to_cpu(im->caps);
+	int issued;
+	unsigned caps = le32_to_cpu(im->caps);
 	unsigned wanted = le32_to_cpu(im->wanted);
 	unsigned seq = le32_to_cpu(im->seq);
 	unsigned mseq = le32_to_cpu(im->migrate_seq);
@@ -2924,40 +2925,52 @@
 	dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
 	     inode, ci, mds, mseq, peer);
 
+retry:
 	spin_lock(&ci->i_ceph_lock);
-	cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
-	if (cap && cap->cap_id == p_cap_id) {
+	cap = __get_cap_for_mds(ci, mds);
+	if (!cap) {
+		if (!new_cap) {
+			spin_unlock(&ci->i_ceph_lock);
+			new_cap = ceph_get_cap(mdsc, NULL);
+			goto retry;
+		}
+		cap = new_cap;
+	} else {
+		if (new_cap) {
+			ceph_put_cap(mdsc, new_cap);
+			new_cap = NULL;
+		}
+	}
+
+	__ceph_caps_issued(ci, &issued);
+	issued |= __ceph_caps_dirty(ci);
+
+	ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
+		     realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
+
+	ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
+	if (ocap && ocap->cap_id == p_cap_id) {
 		dout(" remove export cap %p mds%d flags %d\n",
-		     cap, peer, ph->flags);
+		     ocap, peer, ph->flags);
 		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
-		    (cap->seq != le32_to_cpu(ph->seq) ||
-		     cap->mseq != le32_to_cpu(ph->mseq))) {
+		    (ocap->seq != le32_to_cpu(ph->seq) ||
+		     ocap->mseq != le32_to_cpu(ph->mseq))) {
 			pr_err("handle_cap_import: mismatched seq/mseq: "
 			       "ino (%llx.%llx) mds%d seq %d mseq %d "
 			       "importer mds%d has peer seq %d mseq %d\n",
-			       ceph_vinop(inode), peer, cap->seq,
-			       cap->mseq, mds, le32_to_cpu(ph->seq),
+			       ceph_vinop(inode), peer, ocap->seq,
+			       ocap->mseq, mds, le32_to_cpu(ph->seq),
 			       le32_to_cpu(ph->mseq));
 		}
-		ci->i_cap_exporting_issued = cap->issued;
-		__ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
 
 	/* make sure we re-request max_size, if necessary */
 	ci->i_wanted_max_size = 0;
 	ci->i_requested_max_size = 0;
-	spin_unlock(&ci->i_ceph_lock);
 
-	down_write(&mdsc->snap_rwsem);
-	ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
-			       false);
-	downgrade_write(&mdsc->snap_rwsem);
-	ceph_add_cap(inode, session, cap_id, -1,
-		     issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
-		     NULL /* no caps context */);
-	kick_flushing_inode_caps(mdsc, session, inode);
-	up_read(&mdsc->snap_rwsem);
-
+	*old_issued = issued;
+	*target_cap = cap;
 }
 
 /*
@@ -2977,7 +2990,7 @@
 	struct ceph_mds_caps *h;
 	struct ceph_mds_cap_peer *peer = NULL;
 	int mds = session->s_mds;
-	int op;
+	int op, issued;
 	u32 seq, mseq;
 	struct ceph_vino vino;
 	u64 cap_id;
@@ -3069,7 +3082,10 @@
 
 	case CEPH_CAP_OP_IMPORT:
 		handle_cap_import(mdsc, inode, h, peer, session,
-				  snaptrace, snaptrace_len);
+				  &cap, &issued);
+		handle_cap_grant(mdsc, inode, h,  snaptrace, snaptrace_len,
+				 msg->middle, session, cap, issued);
+		goto done_unlocked;
 	}
 
 	/* the rest require a cap */
@@ -3086,8 +3102,10 @@
 	switch (op) {
 	case CEPH_CAP_OP_REVOKE:
 	case CEPH_CAP_OP_GRANT:
-	case CEPH_CAP_OP_IMPORT:
-		handle_cap_grant(inode, h, session, cap, msg->middle);
+		__ceph_caps_issued(ci, &issued);
+		issued |= __ceph_caps_dirty(ci);
+		handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
+				 session, cap, issued);
 		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 00d6af6..8d7d782 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c

@@ -169,7 +169,7 @@
 	return dentry;
 }
 
-struct dentry *ceph_get_parent(struct dentry *child)
+static struct dentry *ceph_get_parent(struct dentry *child)
 {
 	/* don't re-export snaps */
 	if (ceph_snap(child->d_inode) != CEPH_NOSNAP)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e4fff9f..04c89c2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -10,6 +10,7 @@
 #include <linux/writeback.h>
 #include <linux/vmalloc.h>
 #include <linux/posix_acl.h>
+#include <linux/random.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -179,9 +180,8 @@
  * specified, copy the frag delegation info to the caller if
  * it is present.
  */
-u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
-		     struct ceph_inode_frag *pfrag,
-		     int *found)
+static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+			      struct ceph_inode_frag *pfrag, int *found)
 {
 	u32 t = ceph_frag_make(0, 0);
 	struct ceph_inode_frag *frag;
@@ -191,7 +191,6 @@
 	if (found)
 		*found = 0;
 
-	mutex_lock(&ci->i_fragtree_mutex);
 	while (1) {
 		WARN_ON(!ceph_frag_contains_value(t, v));
 		frag = __ceph_find_frag(ci, t);
@@ -220,10 +219,19 @@
 	}
 	dout("choose_frag(%x) = %x\n", v, t);
 
-	mutex_unlock(&ci->i_fragtree_mutex);
 	return t;
 }
 
+u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+		     struct ceph_inode_frag *pfrag, int *found)
+{
+	u32 ret;
+	mutex_lock(&ci->i_fragtree_mutex);
+	ret = __ceph_choose_frag(ci, v, pfrag, found);
+	mutex_unlock(&ci->i_fragtree_mutex);
+	return ret;
+}
+
 /*
  * Process dirfrag (delegation) info from the mds.  Include leaf
  * fragment in tree ONLY if ndist > 0.  Otherwise, only
@@ -237,11 +245,17 @@
 	u32 id = le32_to_cpu(dirinfo->frag);
 	int mds = le32_to_cpu(dirinfo->auth);
 	int ndist = le32_to_cpu(dirinfo->ndist);
+	int diri_auth = -1;
 	int i;
 	int err = 0;
 
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_auth_cap)
+		diri_auth = ci->i_auth_cap->mds;
+	spin_unlock(&ci->i_ceph_lock);
+
 	mutex_lock(&ci->i_fragtree_mutex);
-	if (ndist == 0) {
+	if (ndist == 0 && mds == diri_auth) {
 		/* no delegation info needed. */
 		frag = __ceph_find_frag(ci, id);
 		if (!frag)
@@ -286,6 +300,75 @@
 	return err;
 }
 
+static int ceph_fill_fragtree(struct inode *inode,
+			      struct ceph_frag_tree_head *fragtree,
+			      struct ceph_mds_reply_dirfrag *dirinfo)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_inode_frag *frag;
+	struct rb_node *rb_node;
+	int i;
+	u32 id, nsplits;
+	bool update = false;
+
+	mutex_lock(&ci->i_fragtree_mutex);
+	nsplits = le32_to_cpu(fragtree->nsplits);
+	if (nsplits) {
+		i = prandom_u32() % nsplits;
+		id = le32_to_cpu(fragtree->splits[i].frag);
+		if (!__ceph_find_frag(ci, id))
+			update = true;
+	} else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) {
+		rb_node = rb_first(&ci->i_fragtree);
+		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+		if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node))
+			update = true;
+	}
+	if (!update && dirinfo) {
+		id = le32_to_cpu(dirinfo->frag);
+		if (id != __ceph_choose_frag(ci, id, NULL, NULL))
+			update = true;
+	}
+	if (!update)
+		goto out_unlock;
+
+	dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode));
+	rb_node = rb_first(&ci->i_fragtree);
+	for (i = 0; i < nsplits; i++) {
+		id = le32_to_cpu(fragtree->splits[i].frag);
+		frag = NULL;
+		while (rb_node) {
+			frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+			if (ceph_frag_compare(frag->frag, id) >= 0) {
+				if (frag->frag != id)
+					frag = NULL;
+				else
+					rb_node = rb_next(rb_node);
+				break;
+			}
+			rb_node = rb_next(rb_node);
+			rb_erase(&frag->node, &ci->i_fragtree);
+			kfree(frag);
+			frag = NULL;
+		}
+		if (!frag) {
+			frag = __get_or_create_frag(ci, id);
+			if (IS_ERR(frag))
+				continue;
+		}
+		frag->split_by = le32_to_cpu(fragtree->splits[i].by);
+		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
+	}
+	while (rb_node) {
+		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+		rb_node = rb_next(rb_node);
+		rb_erase(&frag->node, &ci->i_fragtree);
+		kfree(frag);
+	}
+out_unlock:
+	mutex_unlock(&ci->i_fragtree_mutex);
+	return 0;
+}
 
 /*
  * initialize a newly allocated inode.
@@ -341,7 +424,6 @@
 	INIT_LIST_HEAD(&ci->i_cap_snaps);
 	ci->i_head_snapc = NULL;
 	ci->i_snap_caps = 0;
-	ci->i_cap_exporting_issued = 0;
 
 	for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
 		ci->i_nr_by_mode[i] = 0;
@@ -407,7 +489,7 @@
 
 	/*
 	 * we may still have a snap_realm reference if there are stray
-	 * caps in i_cap_exporting_issued or i_snap_caps.
+	 * caps in i_snap_caps.
 	 */
 	if (ci->i_snap_realm) {
 		struct ceph_mds_client *mdsc =
@@ -582,22 +664,26 @@
 		      unsigned long ttl_from, int cap_fmode,
 		      struct ceph_cap_reservation *caps_reservation)
 {
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_reply_inode *info = iinfo->in;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int i;
-	int issued = 0, implemented;
+	int issued = 0, implemented, new_issued;
 	struct timespec mtime, atime, ctime;
-	u32 nsplits;
-	struct ceph_inode_frag *frag;
-	struct rb_node *rb_node;
 	struct ceph_buffer *xattr_blob = NULL;
+	struct ceph_cap *new_cap = NULL;
 	int err = 0;
-	int queue_trunc = 0;
+	bool wake = false;
+	bool queue_trunc = false;
+	bool new_version = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
 	     ci->i_version);
 
+	/* prealloc new cap struct */
+	if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
+		new_cap = ceph_get_cap(mdsc, caps_reservation);
+
 	/*
 	 * prealloc xattr data, if it looks like we'll need it.  only
 	 * if len > 4 (meaning there are actually xattrs; the first 4
@@ -623,19 +709,23 @@
 	 *   3    2     skip
 	 *   3    3     update
 	 */
-	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
-		goto no_change;
-	
+	if (ci->i_version == 0 ||
+	    ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	     le64_to_cpu(info->version) > (ci->i_version & ~1)))
+		new_version = true;
+
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);
+	new_issued = ~issued & le32_to_cpu(info->cap.caps);
 
 	/* update inode */
 	ci->i_version = le64_to_cpu(info->version);
 	inode->i_version++;
 	inode->i_rdev = le32_to_cpu(info->rdev);
+	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(info->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +734,35 @@
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0)
 		set_nlink(inode, le32_to_cpu(info->nlink));
 
-	/* be careful with mtime, atime, size */
-	ceph_decode_timespec(&atime, &info->atime);
-	ceph_decode_timespec(&mtime, &info->mtime);
-	ceph_decode_timespec(&ctime, &info->ctime);
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(info->truncate_seq),
-					  le64_to_cpu(info->truncate_size),
-					  le64_to_cpu(info->size));
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(info->time_warp_seq),
-			    &ctime, &mtime, &atime);
+	if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
+		/* be careful with mtime, atime, size */
+		ceph_decode_timespec(&atime, &info->atime);
+		ceph_decode_timespec(&mtime, &info->mtime);
+		ceph_decode_timespec(&ctime, &info->ctime);
+		ceph_fill_file_time(inode, issued,
+				le32_to_cpu(info->time_warp_seq),
+				&ctime, &mtime, &atime);
+	}
 
-	ci->i_layout = info->layout;
-	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+	if (new_version ||
+	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		ci->i_layout = info->layout;
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(info->truncate_seq),
+					le64_to_cpu(info->truncate_size),
+					le64_to_cpu(info->size));
+		/* only update max_size on auth cap */
+		if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+		    ci->i_max_size != le64_to_cpu(info->max_size)) {
+			dout("max_size %lld -> %llu\n", ci->i_max_size,
+					le64_to_cpu(info->max_size));
+			ci->i_max_size = le64_to_cpu(info->max_size);
+		}
+	}
 
 	/* xattrs */
 	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,58 +847,6 @@
 		dout(" marking %p complete (empty)\n", inode);
 		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
 	}
-no_change:
-	/* only update max_size on auth cap */
-	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-	    ci->i_max_size != le64_to_cpu(info->max_size)) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size,
-		     le64_to_cpu(info->max_size));
-		ci->i_max_size = le64_to_cpu(info->max_size);
-	}
-
-	spin_unlock(&ci->i_ceph_lock);
-
-	/* queue truncate if we saw i_size decrease */
-	if (queue_trunc)
-		ceph_queue_vmtruncate(inode);
-
-	/* populate frag tree */
-	/* FIXME: move me up, if/when version reflects fragtree changes */
-	nsplits = le32_to_cpu(info->fragtree.nsplits);
-	mutex_lock(&ci->i_fragtree_mutex);
-	rb_node = rb_first(&ci->i_fragtree);
-	for (i = 0; i < nsplits; i++) {
-		u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
-		frag = NULL;
-		while (rb_node) {
-			frag = rb_entry(rb_node, struct ceph_inode_frag, node);
-			if (ceph_frag_compare(frag->frag, id) >= 0) {
-				if (frag->frag != id)
-					frag = NULL;
-				else
-					rb_node = rb_next(rb_node);
-				break;
-			}
-			rb_node = rb_next(rb_node);
-			rb_erase(&frag->node, &ci->i_fragtree);
-			kfree(frag);
-			frag = NULL;
-		}
-		if (!frag) {
-			frag = __get_or_create_frag(ci, id);
-			if (IS_ERR(frag))
-				continue;
-		}
-		frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
-		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
-	}
-	while (rb_node) {
-		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
-		rb_node = rb_next(rb_node);
-		rb_erase(&frag->node, &ci->i_fragtree);
-		kfree(frag);
-	}
-	mutex_unlock(&ci->i_fragtree_mutex);
 
 	/* were we issued a capability? */
 	if (info->cap.caps) {
@@ -809,30 +859,41 @@
 				     le32_to_cpu(info->cap.seq),
 				     le32_to_cpu(info->cap.mseq),
 				     le64_to_cpu(info->cap.realm),
-				     info->cap.flags,
-				     caps_reservation);
+				     info->cap.flags, &new_cap);
+			wake = true;
 		} else {
-			spin_lock(&ci->i_ceph_lock);
 			dout(" %p got snap_caps %s\n", inode,
 			     ceph_cap_string(le32_to_cpu(info->cap.caps)));
 			ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
 			if (cap_fmode >= 0)
 				__ceph_get_fmode(ci, cap_fmode);
-			spin_unlock(&ci->i_ceph_lock);
 		}
 	} else if (cap_fmode >= 0) {
 		pr_warn("mds issued no caps on %llx.%llx\n",
 			   ceph_vinop(inode));
 		__ceph_get_fmode(ci, cap_fmode);
 	}
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (wake)
+		wake_up_all(&ci->i_cap_wq);
+
+	/* queue truncate if we saw i_size decrease */
+	if (queue_trunc)
+		ceph_queue_vmtruncate(inode);
+
+	/* populate frag tree */
+	if (S_ISDIR(inode->i_mode))
+		ceph_fill_fragtree(inode, &info->fragtree, dirinfo);
 
 	/* update delegation info? */
 	if (dirinfo)
 		ceph_fill_dirfrag(inode, dirinfo);
 
 	err = 0;
-
 out:
+	if (new_cap)
+		ceph_put_cap(mdsc, new_cap);
 	if (xattr_blob)
 		ceph_buffer_put(xattr_blob);
 	return err;
@@ -1485,7 +1546,7 @@
 	orig_gen = ci->i_rdcache_gen;
 	spin_unlock(&ci->i_ceph_lock);
 
-	truncate_inode_pages(inode->i_mapping, 0);
+	truncate_pagecache(inode, 0);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (orig_gen == ci->i_rdcache_gen &&
@@ -1588,7 +1649,7 @@
 	     ci->i_truncate_pending, to);
 	spin_unlock(&ci->i_ceph_lock);
 
-	truncate_inode_pages(inode->i_mapping, to);
+	truncate_pagecache(inode, to);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (to == ci->i_truncate_size) {

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9a33b98..92a2548 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -1558,6 +1558,8 @@
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 
+	req->r_stamp = CURRENT_TIME;
+
 	req->r_op = op;
 	req->r_direct_mode = mode;
 	return req;
@@ -1783,7 +1785,8 @@
 	}
 
 	len = sizeof(*head) +
-		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64));
+		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
+		sizeof(struct timespec);
 
 	/* calculate (max) length for cap releases */
 	len += sizeof(struct ceph_mds_request_release) *
@@ -1800,6 +1803,7 @@
 		goto out_free2;
 	}
 
+	msg->hdr.version = 2;
 	msg->hdr.tid = cpu_to_le64(req->r_tid);
 
 	head = msg->front.iov_base;
@@ -1836,6 +1840,9 @@
 		      mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
 	head->num_releases = cpu_to_le16(releases);
 
+	/* time stamp */
+	ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
+
 	BUG_ON(p > end);
 	msg->front.iov_len = p - msg->front.iov_base;
 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);

diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index e90cfcc..e00737c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h

@@ -194,6 +194,7 @@
 	int r_fmode;        /* file mode, if expecting cap */
 	kuid_t r_uid;
 	kgid_t r_gid;
+	struct timespec r_stamp;
 
 	/* for choosing which mds to send this request to */
 	int r_direct_mode;

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ead05cc..12b2074 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -292,7 +292,6 @@
 	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
 						    dirty|flushing caps */
 	unsigned i_snap_caps;           /* cap bits for snapped files */
-	unsigned i_cap_exporting_issued;
 
 	int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
 
@@ -775,11 +774,13 @@
 extern const char *ceph_cap_string(int c);
 extern void ceph_handle_caps(struct ceph_mds_session *session,
 			     struct ceph_msg *msg);
-extern int ceph_add_cap(struct inode *inode,
-			struct ceph_mds_session *session, u64 cap_id,
-			int fmode, unsigned issued, unsigned wanted,
-			unsigned cap, unsigned seq, u64 realmino, int flags,
-			struct ceph_cap_reservation *caps_reservation);
+extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
+				     struct ceph_cap_reservation *ctx);
+extern void ceph_add_cap(struct inode *inode,
+			 struct ceph_mds_session *session, u64 cap_id,
+			 int fmode, unsigned issued, unsigned wanted,
+			 unsigned cap, unsigned seq, u64 realmino, int flags,
+			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1e5b453..d08e079 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c

@@ -617,6 +617,11 @@
 	int nodeid = sn_send_failed->ssf_info.sinfo_ppid;
 
 	log_print("Retry sending %d bytes to node id %d", len, nodeid);
+	
+	if (!nodeid) {
+		log_print("Shouldn't resend data via listening connection.");
+		return;
+	}
 
 	con = nodeid2con(nodeid, 0);
 	if (!con) {

diff --git a/fs/exec.c b/fs/exec.c
index 238b7aa..a3d33fe 100644
--- a/fs/exec.c
+++ b/fs/exec.c

@@ -1046,13 +1046,13 @@
  * so that a new one can be started
  */
 
-void set_task_comm(struct task_struct *tsk, const char *buf)
+void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 {
 	task_lock(tsk);
 	trace_task_rename(tsk, buf);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_event_comm(tsk);
+	perf_event_comm(tsk, exec);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -1110,7 +1110,8 @@
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
-	set_task_comm(current, kbasename(bprm->filename));
+	perf_event_exec();
+	__set_task_comm(current, kbasename(bprm->filename), true);
 
 	/* Set the new mm task size. We have to do that late because it may
 	 * depend on TIF_32BIT which is only updated in flush_thread() on

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
new file mode 100644
index 0000000..6383d54
--- /dev/null
+++ b/include/asm-generic/qrwlock.h

@@ -0,0 +1,166 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QRWLOCK_H
+#define __ASM_GENERIC_QRWLOCK_H
+
+#include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+#include <asm-generic/qrwlock_types.h>
+
+/*
+ * Writer states & reader shift and bias
+ */
+#define	_QW_WAITING	1		/* A writer is waiting	   */
+#define	_QW_LOCKED	0xff		/* A writer holds the lock */
+#define	_QW_WMASK	0xff		/* Writer mask		   */
+#define	_QR_SHIFT	8		/* Reader count shift	   */
+#define _QR_BIAS	(1U << _QR_SHIFT)
+
+/*
+ * External function declarations
+ */
+extern void queue_read_lock_slowpath(struct qrwlock *lock);
+extern void queue_write_lock_slowpath(struct qrwlock *lock);
+
+/**
+ * queue_read_can_lock- would read_trylock() succeed?
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline int queue_read_can_lock(struct qrwlock *lock)
+{
+	return !(atomic_read(&lock->cnts) & _QW_WMASK);
+}
+
+/**
+ * queue_write_can_lock- would write_trylock() succeed?
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline int queue_write_can_lock(struct qrwlock *lock)
+{
+	return !atomic_read(&lock->cnts);
+}
+
+/**
+ * queue_read_trylock - try to acquire read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queue_read_trylock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_read(&lock->cnts);
+	if (likely(!(cnts & _QW_WMASK))) {
+		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		if (likely(!(cnts & _QW_WMASK)))
+			return 1;
+		atomic_sub(_QR_BIAS, &lock->cnts);
+	}
+	return 0;
+}
+
+/**
+ * queue_write_trylock - try to acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queue_write_trylock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_read(&lock->cnts);
+	if (unlikely(cnts))
+		return 0;
+
+	return likely(atomic_cmpxchg(&lock->cnts,
+				     cnts, cnts | _QW_LOCKED) == cnts);
+}
+/**
+ * queue_read_lock - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline void queue_read_lock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	if (likely(!(cnts & _QW_WMASK)))
+		return;
+
+	/* The slowpath will decrement the reader count, if necessary. */
+	queue_read_lock_slowpath(lock);
+}
+
+/**
+ * queue_write_lock - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_write_lock(struct qrwlock *lock)
+{
+	/* Optimize for the unfair lock case where the fair flag is 0. */
+	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+		return;
+
+	queue_write_lock_slowpath(lock);
+}
+
+/**
+ * queue_read_unlock - release read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_read_unlock(struct qrwlock *lock)
+{
+	/*
+	 * Atomically decrement the reader count
+	 */
+	smp_mb__before_atomic();
+	atomic_sub(_QR_BIAS, &lock->cnts);
+}
+
+#ifndef queue_write_unlock
+/**
+ * queue_write_unlock - release write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_write_unlock(struct qrwlock *lock)
+{
+	/*
+	 * If the writer field is atomic, it can be cleared directly.
+	 * Otherwise, an atomic subtraction will be used to clear it.
+	 */
+	smp_mb__before_atomic();
+	atomic_sub(_QW_LOCKED, &lock->cnts);
+}
+#endif
+
+/*
+ * Remapping rwlock architecture specific functions to the corresponding
+ * queue rwlock functions.
+ */
+#define arch_read_can_lock(l)	queue_read_can_lock(l)
+#define arch_write_can_lock(l)	queue_write_can_lock(l)
+#define arch_read_lock(l)	queue_read_lock(l)
+#define arch_write_lock(l)	queue_write_lock(l)
+#define arch_read_trylock(l)	queue_read_trylock(l)
+#define arch_write_trylock(l)	queue_write_trylock(l)
+#define arch_read_unlock(l)	queue_read_unlock(l)
+#define arch_write_unlock(l)	queue_write_unlock(l)
+
+#endif /* __ASM_GENERIC_QRWLOCK_H */

diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
new file mode 100644
index 0000000..4d76f24
--- /dev/null
+++ b/include/asm-generic/qrwlock_types.h

@@ -0,0 +1,21 @@
+#ifndef __ASM_GENERIC_QRWLOCK_TYPES_H
+#define __ASM_GENERIC_QRWLOCK_TYPES_H
+
+#include <linux/types.h>
+#include <asm/spinlock_types.h>
+
+/*
+ * The queue read/write lock data structure
+ */
+
+typedef struct qrwlock {
+	atomic_t		cnts;
+	arch_spinlock_t		lock;
+} arch_rwlock_t;
+
+#define	__ARCH_RW_LOCK_UNLOCKED {		\
+	.cnts = ATOMIC_INIT(0),			\
+	.lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
+}
+
+#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d647637..471ba48 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -109,6 +109,15 @@
 #define BRANCH_PROFILE()
 #endif
 
+#ifdef CONFIG_KPROBES
+#define KPROBE_BLACKLIST()	. = ALIGN(8);				      \
+				VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+				*(_kprobe_blacklist)			      \
+				VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
+#else
+#define KPROBE_BLACKLIST()
+#endif
+
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
@@ -478,6 +487,7 @@
 	*(.init.rodata)							\
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
+	KPROBE_BLACKLIST()						\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\

diff --git a/include/dt-bindings/clk/ti-dra7-atl.h b/include/dt-bindings/clk/ti-dra7-atl.h
new file mode 100644
index 0000000..42dd416
--- /dev/null
+++ b/include/dt-bindings/clk/ti-dra7-atl.h

@@ -0,0 +1,40 @@
+/*
+ * This header provides constants for DRA7 ATL (Audio Tracking Logic)
+ *
+ * The constants defined in this header are used in dts files
+ *
+ * Copyright (C) 2013 Texas Instruments, Inc.
+ *
+ * Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_DRA7_ATL_H
+#define _DT_BINDINGS_CLK_DRA7_ATL_H
+
+#define DRA7_ATL_WS_MCASP1_FSR		0
+#define DRA7_ATL_WS_MCASP1_FSX		1
+#define DRA7_ATL_WS_MCASP2_FSR		2
+#define DRA7_ATL_WS_MCASP2_FSX		3
+#define DRA7_ATL_WS_MCASP3_FSX		4
+#define DRA7_ATL_WS_MCASP4_FSX		5
+#define DRA7_ATL_WS_MCASP5_FSX		6
+#define DRA7_ATL_WS_MCASP6_FSX		7
+#define DRA7_ATL_WS_MCASP7_FSX		8
+#define DRA7_ATL_WS_MCASP8_FSX		9
+#define DRA7_ATL_WS_MCASP8_AHCLKX	10
+#define DRA7_ATL_WS_XREF_CLK3		11
+#define DRA7_ATL_WS_XREF_CLK0		12
+#define DRA7_ATL_WS_XREF_CLK1		13
+#define DRA7_ATL_WS_XREF_CLK2		14
+#define DRA7_ATL_WS_OSC1_X1		15
+
+#endif

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 5f6db18..3c97d5e 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h

@@ -625,6 +625,8 @@
 			   CEPH_CAP_LINK_EXCL |		\
 			   CEPH_CAP_XATTR_EXCL |	\
 			   CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \
+			      CEPH_CAP_FILE_SHARED)
 #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
 			      CEPH_CAP_FILE_EXCL)
 #define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index a486f39..deb47e4 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h

@@ -40,9 +40,9 @@
 };
 
 /*
- * ceph_mon_generic_request is being used for the statfs and poolop requests
- * which are bening done a bit differently because we need to get data back
- * to the caller
+ * ceph_mon_generic_request is being used for the statfs, poolop and
+ * mon_get_version requests which are being done a bit differently
+ * because we need to get data back to the caller
  */
 struct ceph_mon_generic_request {
 	struct kref kref;
@@ -104,10 +104,15 @@
 extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
 
 extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+				 unsigned long timeout);
 
 extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
 			       struct ceph_statfs *buf);
 
+extern int ceph_monc_do_get_version(struct ceph_mon_client *monc,
+				    const char *what, u64 *newest);
+
 extern int ceph_monc_open_session(struct ceph_mon_client *monc);
 
 extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);

diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 4a21a87..e8d8a35 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h

@@ -41,6 +41,8 @@
  * @idlest_reg: register containing the DPLL idle status bitfield
  * @autoidle_mask: mask of the DPLL autoidle mode bitfield in @autoidle_reg
  * @freqsel_mask: mask of the DPLL jitter correction bitfield in @control_reg
+ * @dcc_mask: mask of the DPLL DCC correction bitfield @mult_div1_reg
+ * @dcc_rate: rate atleast which DCC @dcc_mask must be set
  * @idlest_mask: mask of the DPLL idle status bitfield in @idlest_reg
  * @lpmode_mask: mask of the DPLL low-power mode bitfield in @control_reg
  * @m4xen_mask: mask of the DPLL M4X multiplier bitfield in @control_reg
@@ -86,6 +88,8 @@
 	u32			idlest_mask;
 	u32			dco_mask;
 	u32			sddiv_mask;
+	u32			dcc_mask;
+	unsigned long		dcc_rate;
 	u32			lpmode_mask;
 	u32			m4xen_mask;
 	u8			auto_recal_bit;
@@ -94,7 +98,26 @@
 	u8			flags;
 };
 
-struct clk_hw_omap_ops;
+struct clk_hw_omap;
+
+/**
+ * struct clk_hw_omap_ops - OMAP clk ops
+ * @find_idlest: find idlest register information for a clock
+ * @find_companion: find companion clock register information for a clock,
+ *		    basically converts CM_ICLKEN* <-> CM_FCLKEN*
+ * @allow_idle: enables autoidle hardware functionality for a clock
+ * @deny_idle: prevent autoidle hardware functionality for a clock
+ */
+struct clk_hw_omap_ops {
+	void	(*find_idlest)(struct clk_hw_omap *oclk,
+			       void __iomem **idlest_reg,
+			       u8 *idlest_bit, u8 *idlest_val);
+	void	(*find_companion)(struct clk_hw_omap *oclk,
+				  void __iomem **other_reg,
+				  u8 *other_bit);
+	void	(*allow_idle)(struct clk_hw_omap *oclk);
+	void	(*deny_idle)(struct clk_hw_omap *oclk);
+};
 
 /**
  * struct clk_hw_omap - OMAP struct clk
@@ -259,6 +282,12 @@
 void omap2_dflt_clk_disable(struct clk_hw *hw);
 int omap2_dflt_clk_is_enabled(struct clk_hw *hw);
 void omap3_clk_lock_dpll5(void);
+unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
+				    unsigned long parent_rate);
+int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
+			     unsigned long parent_rate);
+void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
+void omap2xxx_clkt_vps_init(void);
 
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
@@ -278,6 +307,8 @@
 int dra7xx_dt_clk_init(void);
 int am33xx_dt_clk_init(void);
 int am43xx_dt_clk_init(void);
+int omap2420_dt_clk_init(void);
+int omap2430_dt_clk_init(void);
 
 #ifdef CONFIG_OF
 void of_ti_clk_allow_autoidle_all(void);
@@ -287,6 +318,8 @@
 static inline void of_ti_clk_deny_autoidle_all(void) { }
 #endif
 
+extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
+extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 extern const struct clk_hw_omap_ops clkhwops_omap3_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx;
 extern const struct clk_hw_omap_ops clkhwops_wait;

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 64fdfe1..d5ad7b1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h

@@ -383,7 +383,9 @@
 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
 #ifdef CONFIG_KPROBES
 # define __kprobes	__attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline	__always_inline
 #else
 # define __kprobes
+# define nokprobe_inline	inline
 #endif
 #endif /* __LINUX_COMPILER_H */

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 7bd2ad0..f7296e5 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h

@@ -205,10 +205,10 @@
 	void *addr;
 };
 
-struct kprobe_blackpoint {
-	const char *name;
+struct kprobe_blacklist_entry {
+	struct list_head list;
 	unsigned long start_addr;
-	unsigned long range;
+	unsigned long end_addr;
 };
 
 #ifdef CONFIG_KPROBES
@@ -265,6 +265,7 @@
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
+extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
 struct kprobe_insn_cache {
 	struct mutex mutex;
@@ -476,4 +477,18 @@
 	return enable_kprobe(&jp->kp);
 }
 
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+#define __NOKPROBE_SYMBOL(fname)			\
+static unsigned long __used				\
+	__attribute__((section("_kprobe_blacklist")))	\
+	_kbl_addr_##fname = (unsigned long)fname;
+#define NOKPROBE_SYMBOL(fname)	__NOKPROBE_SYMBOL(fname)
+#else
+#define NOKPROBE_SYMBOL(fname)
+#endif
+
 #endif /* _LINUX_KPROBES_H */

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 970c681..ec4e3bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h

@@ -586,7 +586,7 @@
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
+int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a50173c..2bf4031 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h

@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _LINUX_NVME_H
@@ -66,8 +62,8 @@
 
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
-extern unsigned char io_timeout;
-#define NVME_IO_TIMEOUT	(io_timeout * HZ)
+extern unsigned char nvme_io_timeout;
+#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
@@ -94,7 +90,7 @@
 	struct miscdevice miscdev;
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
-	struct notifier_block nb;
+	struct work_struct cpu_work;
 	char name[12];
 	char serial[20];
 	char model[40];
@@ -103,6 +99,7 @@
 	u32 stripe_size;
 	u16 oncs;
 	u16 abort_limit;
+	u8 vwc;
 	u8 initialized;
 };
 
@@ -159,7 +156,6 @@
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
 int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
 int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
 							u32 *result);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a920911..707617a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -167,6 +167,11 @@
 #define PERF_EVENT_TXN 0x1
 
 /**
+ * pmu::capabilities flags
+ */
+#define PERF_PMU_CAP_NO_INTERRUPT		0x01
+
+/**
  * struct pmu - generic performance monitoring unit
  */
 struct pmu {
@@ -178,6 +183,11 @@
 	const char			*name;
 	int				type;
 
+	/*
+	 * various common per-pmu feature flags
+	 */
+	int				capabilities;
+
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
 	int				task_ctx_nr;
@@ -696,7 +706,8 @@
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
-extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_exec(void);
+extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
@@ -773,7 +784,7 @@
 extern void perf_event_disable(struct perf_event *event);
 extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
-#else
+#else /* !CONFIG_PERF_EVENTS: */
 static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
@@ -803,7 +814,8 @@
 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
-static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_exec(void)				{ }
+static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }

diff --git a/include/linux/platform_data/shtc1.h b/include/linux/platform_data/shtc1.h
new file mode 100644
index 0000000..7b8c353
--- /dev/null
+++ b/include/linux/platform_data/shtc1.h

@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SHTC1_H_
+#define __SHTC1_H_
+
+struct shtc1_platform_data {
+	bool blocking_io;
+	bool high_precision;
+};
+#endif /* __SHTC1_H_ */

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d69cf63..49a4d6f 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h

@@ -97,7 +97,7 @@
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 03f3b05..8d79708 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h

@@ -16,6 +16,7 @@
 
 #include <linux/atomic.h>
 
+struct optimistic_spin_queue;
 struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -23,9 +24,17 @@
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-	long			count;
-	raw_spinlock_t		wait_lock;
-	struct list_head	wait_list;
+	long count;
+	raw_spinlock_t wait_lock;
+	struct list_head wait_list;
+#ifdef CONFIG_SMP
+	/*
+	 * Write owner. Used as a speculative check to see
+	 * if the owner is running on the cpu.
+	 */
+	struct task_struct *owner;
+	struct optimistic_spin_queue *osq; /* spinner MCS lock */
+#endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
@@ -55,11 +64,21 @@
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
+#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+#define __RWSEM_INITIALIZER(name)			\
+	{ RWSEM_UNLOCKED_VALUE,				\
+	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
+	  LIST_HEAD_INIT((name).wait_list),		\
+	  NULL, /* owner */				\
+	  NULL /* mcs lock */                           \
+	  __RWSEM_DEP_MAP_INIT(name) }
+#else
 #define __RWSEM_INITIALIZER(name)			\
 	{ RWSEM_UNLOCKED_VALUE,				\
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
 	  LIST_HEAD_INIT((name).wait_list)		\
 	  __RWSEM_DEP_MAP_INIT(name) }
+#endif
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea74596..306f4f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -847,10 +847,10 @@
 };
 
 /*
- * Increase resolution of cpu_power calculations
+ * Increase resolution of cpu_capacity calculations
  */
-#define SCHED_POWER_SHIFT	10
-#define SCHED_POWER_SCALE	(1L << SCHED_POWER_SHIFT)
+#define SCHED_CAPACITY_SHIFT	10
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
 /*
  * sched-domains (multiprocessor balancing) declarations:
@@ -862,7 +862,7 @@
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu power */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
@@ -874,7 +874,7 @@
 #ifdef CONFIG_SCHED_SMT
 static inline const int cpu_smt_flags(void)
 {
-	return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
@@ -1006,7 +1006,7 @@
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
-	struct sched_group_power **__percpu sgp;
+	struct sched_group_capacity **__percpu sgc;
 };
 
 struct sched_domain_topology_level {
@@ -2173,7 +2173,7 @@
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-extern bool yield_to(struct task_struct *p, bool preempt);
+extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
 /**
@@ -2421,7 +2421,11 @@
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
-extern void set_task_comm(struct task_struct *tsk, const char *from);
+extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+static inline void set_task_comm(struct task_struct *tsk, const char *from)
+{
+	__set_task_comm(tsk, from, false);
+}
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c52f827..4f844c6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h

@@ -103,6 +103,7 @@
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
@@ -133,6 +134,9 @@
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
+
+#define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
+
 static inline int
 uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index 4195b97..de429d1 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h

@@ -35,11 +35,23 @@
 	u8 lun[8];		/* Logical Unit Number */
 	u64 tag;		/* Command identifier */
 	u8 task_attr;		/* Task attribute */
-	u8 prio;
+	u8 prio;		/* SAM command priority field */
 	u8 crn;
 	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
 } __packed;
 
+/* SCSI command request, followed by protection information */
+struct virtio_scsi_cmd_req_pi {
+	u8 lun[8];		/* Logical Unit Number */
+	u64 tag;		/* Command identifier */
+	u8 task_attr;		/* Task attribute */
+	u8 prio;		/* SAM command priority field */
+	u8 crn;
+	u32 pi_bytesout;	/* DataOUT PI Number of bytes */
+	u32 pi_bytesin;		/* DataIN PI Number of bytes */
+	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __packed;
+
 /* Response, followed by sense data and data-in */
 struct virtio_scsi_cmd_resp {
 	u32 sense_len;		/* Sense data length */
@@ -97,6 +109,7 @@
 #define VIRTIO_SCSI_F_INOUT                    0
 #define VIRTIO_SCSI_F_HOTPLUG                  1
 #define VIRTIO_SCSI_F_CHANGE                   2
+#define VIRTIO_SCSI_F_T10_PI                   3
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index bca25dc..8fab6fa 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h

@@ -432,6 +432,7 @@
 void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no);
 
 void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state);
+void vb2_discard_done(struct vb2_queue *q);
 int vb2_wait_for_all_buffers(struct vb2_queue *q);
 
 int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b);

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index e016e2a..42ed789 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h

@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
+#include <scsi/scsi_device.h>
 
 struct Scsi_Host;
 struct scsi_device;
@@ -315,4 +316,20 @@
 	cmd->result = (cmd->result & 0x00ffffff) | (status << 24);
 }
 
+static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
+{
+	unsigned int xfer_len = blk_rq_bytes(scmd->request);
+	unsigned int prot_op = scsi_get_prot_op(scmd);
+	unsigned int sector_size = scmd->device->sector_size;
+
+	switch (prot_op) {
+	case SCSI_PROT_NORMAL:
+	case SCSI_PROT_WRITE_STRIP:
+	case SCSI_PROT_READ_INSERT:
+		return xfer_len;
+	}
+
+	return xfer_len + (xfer_len >> ilog2(sector_size)) * 8;
+}
+
 #endif /* _SCSI_SCSI_CMND_H */

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index b4d6697..d854fb3 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h

@@ -932,7 +932,7 @@
 				   struct timespec *tv)
 {
 	if (runtime->tstamp_type == SNDRV_PCM_TSTAMP_TYPE_MONOTONIC)
-		do_posix_clock_monotonic_gettime(tv);
+		ktime_get_ts(tv);
 	else
 		getnstimeofday(tv);
 }

diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 33b487b..daef9da 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h

@@ -70,7 +70,8 @@
 extern void iscsit_build_task_mgt_rsp(struct iscsi_cmd *, struct iscsi_conn *,
 				struct iscsi_tm_rsp *);
 extern int iscsit_build_text_rsp(struct iscsi_cmd *, struct iscsi_conn *,
-				struct iscsi_text_rsp *);
+				struct iscsi_text_rsp *,
+				enum iscsit_transport_type);
 extern void iscsit_build_reject(struct iscsi_cmd *, struct iscsi_conn *,
 				struct iscsi_reject *);
 extern int iscsit_build_logout_rsp(struct iscsi_cmd *, struct iscsi_conn *,

diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 3a1c1ee..9adc1bc 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h

@@ -59,6 +59,7 @@
 void	transport_subsystem_release(struct se_subsystem_api *);
 
 void	target_complete_cmd(struct se_cmd *, u8);
+void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
 sense_reason_t	spc_parse_cdb(struct se_cmd *cmd, unsigned int *size);
 sense_reason_t	spc_emulate_report_luns(struct se_cmd *cmd);

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 67e1bbf..0a68d5a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h

@@ -530,6 +530,26 @@
 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
 			__entry->dst_cpu, __entry->dst_nid)
 );
+
+/*
+ * Tracepoint for waking a polling cpu without an IPI.
+ */
+TRACE_EVENT(sched_wake_idle_without_ipi,
+
+	TP_PROTO(int cpu),
+
+	TP_ARGS(cpu),
+
+	TP_STRUCT__entry(
+		__field(	int,	cpu	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+	),
+
+	TP_printk("cpu=%d", __entry->cpu)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7554fd3..6f9c38c 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h

@@ -306,6 +306,14 @@
 	char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
 };
 
+struct btrfs_ioctl_search_args_v2 {
+	struct btrfs_ioctl_search_key key; /* in/out - search parameters */
+	__u64 buf_size;		   /* in - size of buffer
+					    * out - on EOVERFLOW: needed size
+					    *       to store item */
+	__u64 buf[0];                       /* out - found items */
+};
+
 struct btrfs_ioctl_clone_range_args {
   __s64 src_fd;
   __u64 src_offset, src_length;
@@ -558,6 +566,8 @@
 				struct btrfs_ioctl_defrag_range_args)
 #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
 				   struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+					   struct btrfs_ioctl_search_args_v2)
 #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
 				   struct btrfs_ioctl_ino_lookup_args)
 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)

diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 096fe1c..29a7d86 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h

@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _UAPI_LINUX_NVME_H
@@ -31,7 +27,12 @@
 	__u8			read_lat;
 	__u8			write_tput;
 	__u8			write_lat;
-	__u8			rsvd16[16];
+	__le16			idle_power;
+	__u8			idle_scale;
+	__u8			rsvd19;
+	__le16			active_power;
+	__u8			active_work_scale;
+	__u8			rsvd23[9];
 };
 
 enum {
@@ -49,7 +50,9 @@
 	__u8			ieee[3];
 	__u8			mic;
 	__u8			mdts;
-	__u8			rsvd78[178];
+	__u16			cntlid;
+	__u32			ver;
+	__u8			rsvd84[172];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -57,7 +60,11 @@
 	__u8			lpa;
 	__u8			elpe;
 	__u8			npss;
-	__u8			rsvd264[248];
+	__u8			avscc;
+	__u8			apsta;
+	__le16			wctemp;
+	__le16			cctemp;
+	__u8			rsvd270[242];
 	__u8			sqes;
 	__u8			cqes;
 	__u8			rsvd514[2];
@@ -68,7 +75,12 @@
 	__u8			vwc;
 	__le16			awun;
 	__le16			awupf;
-	__u8			rsvd530[1518];
+	__u8			nvscc;
+	__u8			rsvd531;
+	__le16			acwu;
+	__u8			rsvd534[2];
+	__le32			sgls;
+	__u8			rsvd540[1508];
 	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
@@ -77,6 +89,7 @@
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
 struct nvme_lbaf {
@@ -95,7 +108,15 @@
 	__u8			mc;
 	__u8			dpc;
 	__u8			dps;
-	__u8			rsvd30[98];
+	__u8			nmic;
+	__u8			rescap;
+	__u8			fpi;
+	__u8			rsvd33;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__u8			rsvd40[80];
+	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
 	__u8			rsvd192[192];
 	__u8			vs[3712];
@@ -126,7 +147,10 @@
 	__u8			unsafe_shutdowns[16];
 	__u8			media_errors[16];
 	__u8			num_err_log_entries[16];
-	__u8			rsvd192[320];
+	__le32			warning_temp_time;
+	__le32			critical_comp_time;
+	__le16			temp_sensor[8];
+	__u8			rsvd216[296];
 };
 
 enum {
@@ -282,6 +306,10 @@
 	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
 	NVME_FEAT_ASYNC_EVENT	= 0x0b,
 	NVME_FEAT_SW_PROGRESS	= 0x0c,
+	NVME_LOG_ERROR		= 0x01,
+	NVME_LOG_SMART		= 0x02,
+	NVME_LOG_FW_SLOT	= 0x03,
+	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
 	NVME_FWACT_REPL_ACTV	= (1 << 3),
 	NVME_FWACT_ACTV		= (2 << 3),

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e3fc8f0..5312fae 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h

@@ -163,8 +163,9 @@
 	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
 	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
 	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND		= 1U << 10, /* conditional branches */
 
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
@@ -301,8 +302,8 @@
 				exclude_callchain_kernel : 1, /* exclude kernel callchains */
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
-
-				__reserved_1   : 40;
+				comm_exec      :  1, /* flag comm events that are due to an exec */
+				__reserved_1   : 39;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -501,7 +502,12 @@
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
 /*
  * Indicates that the content of PERF_SAMPLE_IP points to
  * the actual instruction that triggered the event. See also

diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index 5759810..21eed48 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h

@@ -80,7 +80,7 @@
 struct snd_compr_avail {
 	__u64 avail;
 	struct snd_compr_tstamp tstamp;
-};
+} __attribute__((packed));
 
 enum snd_compr_direction {
 	SND_COMPRESS_PLAYBACK = 0,

diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index d2b32ac..35536d9 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks

@@ -223,3 +223,10 @@
 config MUTEX_SPIN_ON_OWNER
 	def_bool y
 	depends on SMP && !DEBUG_MUTEXES
+
+config ARCH_USE_QUEUE_RWLOCK
+	bool
+
+config QUEUE_RWLOCK
+	def_bool y if ARCH_USE_QUEUE_RWLOCK
+	depends on SMP

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 24d35cc..5fa58e4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -2974,6 +2974,22 @@
 	local_irq_restore(flags);
 }
 
+void perf_event_exec(void)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	rcu_read_lock();
+	for_each_task_context_nr(ctxn) {
+		ctx = current->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
+
+		perf_event_enable_on_exec(ctx);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -5075,21 +5091,9 @@
 		       NULL);
 }
 
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
 {
 	struct perf_comm_event comm_event;
-	struct perf_event_context *ctx;
-	int ctxn;
-
-	rcu_read_lock();
-	for_each_task_context_nr(ctxn) {
-		ctx = task->perf_event_ctxp[ctxn];
-		if (!ctx)
-			continue;
-
-		perf_event_enable_on_exec(ctx);
-	}
-	rcu_read_unlock();
 
 	if (!atomic_read(&nr_comm_events))
 		return;
@@ -5101,7 +5105,7 @@
 		.event_id  = {
 			.header = {
 				.type = PERF_RECORD_COMM,
-				.misc = 0,
+				.misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
 				/* .size */
 			},
 			/* .pid */
@@ -7122,6 +7126,13 @@
 		}
 	}
 
+	if (is_sampling_event(event)) {
+		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+			err = -ENOTSUPP;
+			goto err_alloc;
+		}
+	}
+
 	account_event(event);
 
 	/*
@@ -7433,7 +7444,7 @@
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-	struct perf_event *child_event;
+	struct perf_event *child_event, *next;
 	struct perf_event_context *child_ctx;
 	unsigned long flags;
 
@@ -7487,7 +7498,7 @@
 	 */
 	mutex_lock(&child_ctx->mutex);
 
-	list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
+	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
 		__perf_event_exit_task(child_event, child_ctx, child);
 
 	mutex_unlock(&child_ctx->mutex);

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index adcd76a..c445e39 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c

@@ -36,6 +36,7 @@
 #include "../../mm/internal.h"	/* munlock_vma_page */
 #include <linux/percpu-rwsem.h>
 #include <linux/task_work.h>
+#include <linux/shmem_fs.h>
 
 #include <linux/uprobes.h>
 
@@ -127,7 +128,7 @@
  */
 static bool valid_vma(struct vm_area_struct *vma, bool is_register)
 {
-	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
+	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
 
 	if (is_register)
 		flags |= VM_WRITE;
@@ -279,18 +280,13 @@
  * supported by that architecture then we need to modify is_trap_at_addr and
  * uprobe_write_opcode accordingly. This would never be a problem for archs
  * that have fixed length instructions.
- */
-
-/*
+ *
  * uprobe_write_opcode - write the opcode at a given virtual address.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
  *
- * Called with mm->mmap_sem held (for read and with a reference to
- * mm).
- *
- * For mm @mm, write the opcode at @vaddr.
+ * Called with mm->mmap_sem held for write.
  * Return 0 (success) or a negative errno.
  */
 int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
@@ -310,21 +306,25 @@
 	if (ret <= 0)
 		goto put_old;
 
+	ret = anon_vma_prepare(vma);
+	if (ret)
+		goto put_old;
+
 	ret = -ENOMEM;
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
 	if (!new_page)
 		goto put_old;
 
-	__SetPageUptodate(new_page);
+	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+		goto put_new;
 
+	__SetPageUptodate(new_page);
 	copy_highpage(new_page, old_page);
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
-	ret = anon_vma_prepare(vma);
-	if (ret)
-		goto put_new;
-
 	ret = __replace_page(vma, vaddr, old_page, new_page);
+	if (ret)
+		mem_cgroup_uncharge_page(new_page);
 
 put_new:
 	page_cache_release(new_page);
@@ -537,14 +537,15 @@
 			void *insn, int nbytes, loff_t offset)
 {
 	struct page *page;
-
-	if (!mapping->a_ops->readpage)
-		return -EIO;
 	/*
-	 * Ensure that the page that has the original instruction is
-	 * populated and in page-cache.
+	 * Ensure that the page that has the original instruction is populated
+	 * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
+	 * see uprobe_register().
 	 */
-	page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+	if (mapping->a_ops->readpage)
+		page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+	else
+		page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
@@ -880,6 +881,9 @@
 	if (!uc->handler && !uc->ret_handler)
 		return -EINVAL;
 
+	/* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
+	if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
+		return -EIO;
 	/* Racy, just to catch the obvious mistakes */
 	if (offset > i_size_read(inode))
 		return -EINVAL;
@@ -1361,6 +1365,16 @@
 	return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
 }
 
+unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	if (unlikely(utask && utask->active_uprobe))
+		return utask->vaddr;
+
+	return instruction_pointer(regs);
+}
+
 /*
  * Called with no locks held.
  * Called in context of a exiting or a exec-ing thread.

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ceeadfc..3214289 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c

@@ -86,21 +86,8 @@
 	return &(kretprobe_table_locks[hash].lock);
 }
 
-/*
- * Normally, functions that we'd want to prohibit kprobes in, are marked
- * __kprobes. But, there are cases where such functions already belong to
- * a different section (__sched for preempt_schedule)
- *
- * For such cases, we now have a blacklist
- */
-static struct kprobe_blackpoint kprobe_blacklist[] = {
-	{"preempt_schedule",},
-	{"native_get_debugreg",},
-	{"irq_entries_start",},
-	{"common_interrupt",},
-	{"mcount",},	/* mcount can be called from everywhere */
-	{NULL}    /* Terminator */
-};
+/* Blacklist -- list of struct kprobe_blacklist_entry */
+static LIST_HEAD(kprobe_blacklist);
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
@@ -151,13 +138,13 @@
 	.insn_size = MAX_INSN_SIZE,
 	.nr_garbage = 0,
 };
-static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
+static int collect_garbage_slots(struct kprobe_insn_cache *c);
 
 /**
  * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
+kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
 {
 	struct kprobe_insn_page *kip;
 	kprobe_opcode_t *slot = NULL;
@@ -214,7 +201,7 @@
 }
 
 /* Return 1 if all garbages are collected, otherwise 0. */
-static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
+static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
 {
 	kip->slot_used[idx] = SLOT_CLEAN;
 	kip->nused--;
@@ -235,7 +222,7 @@
 	return 0;
 }
 
-static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
 {
 	struct kprobe_insn_page *kip, *next;
 
@@ -257,8 +244,8 @@
 	return 0;
 }
 
-void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
-				kprobe_opcode_t *slot, int dirty)
+void __free_insn_slot(struct kprobe_insn_cache *c,
+		      kprobe_opcode_t *slot, int dirty)
 {
 	struct kprobe_insn_page *kip;
 
@@ -314,7 +301,7 @@
  * 				OR
  * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
  */
-struct kprobe __kprobes *get_kprobe(void *addr)
+struct kprobe *get_kprobe(void *addr)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -327,8 +314,9 @@
 
 	return NULL;
 }
+NOKPROBE_SYMBOL(get_kprobe);
 
-static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
 /* Return true if the kprobe is an aggregator */
 static inline int kprobe_aggrprobe(struct kprobe *p)
@@ -360,7 +348,7 @@
  * Call all pre_handler on the list, but ignores its return value.
  * This must be called from arch-dep optimized caller.
  */
-void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *kp;
 
@@ -372,9 +360,10 @@
 		reset_kprobe_instance();
 	}
 }
+NOKPROBE_SYMBOL(opt_pre_handler);
 
 /* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
+static void free_aggr_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -412,7 +401,7 @@
 }
 
 /* Return true(!0) if the probe is queued on (un)optimizing lists */
-static int __kprobes kprobe_queued(struct kprobe *p)
+static int kprobe_queued(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -428,7 +417,7 @@
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
  */
-static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+static struct kprobe *get_optimized_kprobe(unsigned long addr)
 {
 	int i;
 	struct kprobe *p = NULL;
@@ -460,7 +449,7 @@
  * Optimize (replace a breakpoint with a jump) kprobes listed on
  * optimizing_list.
  */
-static __kprobes void do_optimize_kprobes(void)
+static void do_optimize_kprobes(void)
 {
 	/* Optimization never be done when disarmed */
 	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
@@ -488,7 +477,7 @@
  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
  * if need) kprobes listed on unoptimizing_list.
  */
-static __kprobes void do_unoptimize_kprobes(void)
+static void do_unoptimize_kprobes(void)
 {
 	struct optimized_kprobe *op, *tmp;
 
@@ -520,7 +509,7 @@
 }
 
 /* Reclaim all kprobes on the free_list */
-static __kprobes void do_free_cleaned_kprobes(void)
+static void do_free_cleaned_kprobes(void)
 {
 	struct optimized_kprobe *op, *tmp;
 
@@ -532,13 +521,13 @@
 }
 
 /* Start optimizer after OPTIMIZE_DELAY passed */
-static __kprobes void kick_kprobe_optimizer(void)
+static void kick_kprobe_optimizer(void)
 {
 	schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
 }
 
 /* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+static void kprobe_optimizer(struct work_struct *work)
 {
 	mutex_lock(&kprobe_mutex);
 	/* Lock modules while optimizing kprobes */
@@ -574,7 +563,7 @@
 }
 
 /* Wait for completing optimization and unoptimization */
-static __kprobes void wait_for_kprobe_optimizer(void)
+static void wait_for_kprobe_optimizer(void)
 {
 	mutex_lock(&kprobe_mutex);
 
@@ -593,7 +582,7 @@
 }
 
 /* Optimize kprobe if p is ready to be optimized */
-static __kprobes void optimize_kprobe(struct kprobe *p)
+static void optimize_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -627,7 +616,7 @@
 }
 
 /* Short cut to direct unoptimizing */
-static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 {
 	get_online_cpus();
 	arch_unoptimize_kprobe(op);
@@ -637,7 +626,7 @@
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
+static void unoptimize_kprobe(struct kprobe *p, bool force)
 {
 	struct optimized_kprobe *op;
 
@@ -697,7 +686,7 @@
 }
 
 /* Remove optimized instructions */
-static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+static void kill_optimized_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -723,7 +712,7 @@
 }
 
 /* Try to prepare optimized instructions */
-static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+static void prepare_optimized_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -732,7 +721,7 @@
 }
 
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
-static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -747,13 +736,13 @@
 	return &op->kp;
 }
 
-static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
 
 /*
  * Prepare an optimized_kprobe and optimize it
  * NOTE: p must be a normal registered kprobe
  */
-static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+static void try_to_optimize_kprobe(struct kprobe *p)
 {
 	struct kprobe *ap;
 	struct optimized_kprobe *op;
@@ -787,7 +776,7 @@
 }
 
 #ifdef CONFIG_SYSCTL
-static void __kprobes optimize_all_kprobes(void)
+static void optimize_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -810,7 +799,7 @@
 	mutex_unlock(&kprobe_mutex);
 }
 
-static void __kprobes unoptimize_all_kprobes(void)
+static void unoptimize_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -861,7 +850,7 @@
 #endif /* CONFIG_SYSCTL */
 
 /* Put a breakpoint for a probe. Must be called with text_mutex locked */
-static void __kprobes __arm_kprobe(struct kprobe *p)
+static void __arm_kprobe(struct kprobe *p)
 {
 	struct kprobe *_p;
 
@@ -876,7 +865,7 @@
 }
 
 /* Remove the breakpoint of a probe. Must be called with text_mutex locked */
-static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
+static void __disarm_kprobe(struct kprobe *p, bool reopt)
 {
 	struct kprobe *_p;
 
@@ -911,13 +900,13 @@
 	BUG_ON(kprobe_unused(ap));
 }
 
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
+static void free_aggr_kprobe(struct kprobe *p)
 {
 	arch_remove_kprobe(p);
 	kfree(p);
 }
 
-static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
 	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
 }
@@ -931,7 +920,7 @@
 static int kprobe_ftrace_enabled;
 
 /* Must ensure p->addr is really on ftrace */
-static int __kprobes prepare_kprobe(struct kprobe *p)
+static int prepare_kprobe(struct kprobe *p)
 {
 	if (!kprobe_ftrace(p))
 		return arch_prepare_kprobe(p);
@@ -940,7 +929,7 @@
 }
 
 /* Caller must lock kprobe_mutex */
-static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
+static void arm_kprobe_ftrace(struct kprobe *p)
 {
 	int ret;
 
@@ -955,7 +944,7 @@
 }
 
 /* Caller must lock kprobe_mutex */
-static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
+static void disarm_kprobe_ftrace(struct kprobe *p)
 {
 	int ret;
 
@@ -975,7 +964,7 @@
 #endif
 
 /* Arm a kprobe with text_mutex */
-static void __kprobes arm_kprobe(struct kprobe *kp)
+static void arm_kprobe(struct kprobe *kp)
 {
 	if (unlikely(kprobe_ftrace(kp))) {
 		arm_kprobe_ftrace(kp);
@@ -992,7 +981,7 @@
 }
 
 /* Disarm a kprobe with text_mutex */
-static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
+static void disarm_kprobe(struct kprobe *kp, bool reopt)
 {
 	if (unlikely(kprobe_ftrace(kp))) {
 		disarm_kprobe_ftrace(kp);
@@ -1008,7 +997,7 @@
  * Aggregate handlers for multiple kprobes support - these handlers
  * take care of invoking the individual kprobe handlers on p->list
  */
-static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *kp;
 
@@ -1022,9 +1011,10 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(aggr_pre_handler);
 
-static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
-					unsigned long flags)
+static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
+			      unsigned long flags)
 {
 	struct kprobe *kp;
 
@@ -1036,9 +1026,10 @@
 		}
 	}
 }
+NOKPROBE_SYMBOL(aggr_post_handler);
 
-static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
-					int trapnr)
+static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
+			      int trapnr)
 {
 	struct kprobe *cur = __this_cpu_read(kprobe_instance);
 
@@ -1052,8 +1043,9 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(aggr_fault_handler);
 
-static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *cur = __this_cpu_read(kprobe_instance);
 	int ret = 0;
@@ -1065,9 +1057,10 @@
 	reset_kprobe_instance();
 	return ret;
 }
+NOKPROBE_SYMBOL(aggr_break_handler);
 
 /* Walks the list and increments nmissed count for multiprobe case */
-void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
+void kprobes_inc_nmissed_count(struct kprobe *p)
 {
 	struct kprobe *kp;
 	if (!kprobe_aggrprobe(p)) {
@@ -1078,9 +1071,10 @@
 	}
 	return;
 }
+NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
 
-void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
-				struct hlist_head *head)
+void recycle_rp_inst(struct kretprobe_instance *ri,
+		     struct hlist_head *head)
 {
 	struct kretprobe *rp = ri->rp;
 
@@ -1095,8 +1089,9 @@
 		/* Unregistering */
 		hlist_add_head(&ri->hlist, head);
 }
+NOKPROBE_SYMBOL(recycle_rp_inst);
 
-void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
+void kretprobe_hash_lock(struct task_struct *tsk,
 			 struct hlist_head **head, unsigned long *flags)
 __acquires(hlist_lock)
 {
@@ -1107,17 +1102,19 @@
 	hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_lock_irqsave(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_hash_lock);
 
-static void __kprobes kretprobe_table_lock(unsigned long hash,
-	unsigned long *flags)
+static void kretprobe_table_lock(unsigned long hash,
+				 unsigned long *flags)
 __acquires(hlist_lock)
 {
 	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_lock_irqsave(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_table_lock);
 
-void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
-	unsigned long *flags)
+void kretprobe_hash_unlock(struct task_struct *tsk,
+			   unsigned long *flags)
 __releases(hlist_lock)
 {
 	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -1126,14 +1123,16 @@
 	hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_unlock_irqrestore(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_hash_unlock);
 
-static void __kprobes kretprobe_table_unlock(unsigned long hash,
-       unsigned long *flags)
+static void kretprobe_table_unlock(unsigned long hash,
+				   unsigned long *flags)
 __releases(hlist_lock)
 {
 	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
 	raw_spin_unlock_irqrestore(hlist_lock, *flags);
 }
+NOKPROBE_SYMBOL(kretprobe_table_unlock);
 
 /*
  * This function is called from finish_task_switch when task tk becomes dead,
@@ -1141,7 +1140,7 @@
  * with this task. These left over instances represent probed functions
  * that have been called but will never return.
  */
-void __kprobes kprobe_flush_task(struct task_struct *tk)
+void kprobe_flush_task(struct task_struct *tk)
 {
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
@@ -1166,6 +1165,7 @@
 		kfree(ri);
 	}
 }
+NOKPROBE_SYMBOL(kprobe_flush_task);
 
 static inline void free_rp_inst(struct kretprobe *rp)
 {
@@ -1178,7 +1178,7 @@
 	}
 }
 
-static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
+static void cleanup_rp_inst(struct kretprobe *rp)
 {
 	unsigned long flags, hash;
 	struct kretprobe_instance *ri;
@@ -1197,12 +1197,13 @@
 	}
 	free_rp_inst(rp);
 }
+NOKPROBE_SYMBOL(cleanup_rp_inst);
 
 /*
 * Add the new probe to ap->list. Fail if this is the
 * second jprobe at the address - two jprobes can't coexist
 */
-static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
+static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
@@ -1226,7 +1227,7 @@
  * Fill in the required fields of the "manager kprobe". Replace the
  * earlier kprobe in the hlist with the manager kprobe
  */
-static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	/* Copy p's insn slot to ap */
 	copy_kprobe(p, ap);
@@ -1252,8 +1253,7 @@
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
-					  struct kprobe *p)
+static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 {
 	int ret = 0;
 	struct kprobe *ap = orig_p;
@@ -1324,25 +1324,29 @@
 	return ret;
 }
 
-static int __kprobes in_kprobes_functions(unsigned long addr)
+bool __weak arch_within_kprobe_blacklist(unsigned long addr)
 {
-	struct kprobe_blackpoint *kb;
+	/* The __kprobes marked functions and entry code must not be probed */
+	return addr >= (unsigned long)__kprobes_text_start &&
+	       addr < (unsigned long)__kprobes_text_end;
+}
 
-	if (addr >= (unsigned long)__kprobes_text_start &&
-	    addr < (unsigned long)__kprobes_text_end)
-		return -EINVAL;
+static bool within_kprobe_blacklist(unsigned long addr)
+{
+	struct kprobe_blacklist_entry *ent;
+
+	if (arch_within_kprobe_blacklist(addr))
+		return true;
 	/*
 	 * If there exists a kprobe_blacklist, verify and
 	 * fail any probe registration in the prohibited area
 	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		if (kb->start_addr) {
-			if (addr >= kb->start_addr &&
-			    addr < (kb->start_addr + kb->range))
-				return -EINVAL;
-		}
+	list_for_each_entry(ent, &kprobe_blacklist, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return true;
 	}
-	return 0;
+
+	return false;
 }
 
 /*
@@ -1351,7 +1355,7 @@
  * This returns encoded errors if it fails to look up symbol or invalid
  * combination of parameters.
  */
-static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
+static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
 {
 	kprobe_opcode_t *addr = p->addr;
 
@@ -1374,7 +1378,7 @@
 }
 
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+static struct kprobe *__get_valid_kprobe(struct kprobe *p)
 {
 	struct kprobe *ap, *list_p;
 
@@ -1406,8 +1410,8 @@
 	return ret;
 }
 
-static __kprobes int check_kprobe_address_safe(struct kprobe *p,
-					       struct module **probed_mod)
+static int check_kprobe_address_safe(struct kprobe *p,
+				     struct module **probed_mod)
 {
 	int ret = 0;
 	unsigned long ftrace_addr;
@@ -1433,7 +1437,7 @@
 
 	/* Ensure it is not in reserved area nor out of text */
 	if (!kernel_text_address((unsigned long) p->addr) ||
-	    in_kprobes_functions((unsigned long) p->addr) ||
+	    within_kprobe_blacklist((unsigned long) p->addr) ||
 	    jump_label_text_reserved(p->addr, p->addr)) {
 		ret = -EINVAL;
 		goto out;
@@ -1469,7 +1473,7 @@
 	return ret;
 }
 
-int __kprobes register_kprobe(struct kprobe *p)
+int register_kprobe(struct kprobe *p)
 {
 	int ret;
 	struct kprobe *old_p;
@@ -1531,7 +1535,7 @@
 EXPORT_SYMBOL_GPL(register_kprobe);
 
 /* Check if all probes on the aggrprobe are disabled */
-static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+static int aggr_kprobe_disabled(struct kprobe *ap)
 {
 	struct kprobe *kp;
 
@@ -1547,7 +1551,7 @@
 }
 
 /* Disable one kprobe: Make sure called under kprobe_mutex is locked */
-static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
 	struct kprobe *orig_p;
 
@@ -1574,7 +1578,7 @@
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
-static int __kprobes __unregister_kprobe_top(struct kprobe *p)
+static int __unregister_kprobe_top(struct kprobe *p)
 {
 	struct kprobe *ap, *list_p;
 
@@ -1631,7 +1635,7 @@
 	return 0;
 }
 
-static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
+static void __unregister_kprobe_bottom(struct kprobe *p)
 {
 	struct kprobe *ap;
 
@@ -1647,7 +1651,7 @@
 	/* Otherwise, do nothing. */
 }
 
-int __kprobes register_kprobes(struct kprobe **kps, int num)
+int register_kprobes(struct kprobe **kps, int num)
 {
 	int i, ret = 0;
 
@@ -1665,13 +1669,13 @@
 }
 EXPORT_SYMBOL_GPL(register_kprobes);
 
-void __kprobes unregister_kprobe(struct kprobe *p)
+void unregister_kprobe(struct kprobe *p)
 {
 	unregister_kprobes(&p, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_kprobe);
 
-void __kprobes unregister_kprobes(struct kprobe **kps, int num)
+void unregister_kprobes(struct kprobe **kps, int num)
 {
 	int i;
 
@@ -1700,7 +1704,7 @@
 	return (unsigned long)entry;
 }
 
-int __kprobes register_jprobes(struct jprobe **jps, int num)
+int register_jprobes(struct jprobe **jps, int num)
 {
 	struct jprobe *jp;
 	int ret = 0, i;
@@ -1731,19 +1735,19 @@
 }
 EXPORT_SYMBOL_GPL(register_jprobes);
 
-int __kprobes register_jprobe(struct jprobe *jp)
+int register_jprobe(struct jprobe *jp)
 {
 	return register_jprobes(&jp, 1);
 }
 EXPORT_SYMBOL_GPL(register_jprobe);
 
-void __kprobes unregister_jprobe(struct jprobe *jp)
+void unregister_jprobe(struct jprobe *jp)
 {
 	unregister_jprobes(&jp, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_jprobe);
 
-void __kprobes unregister_jprobes(struct jprobe **jps, int num)
+void unregister_jprobes(struct jprobe **jps, int num)
 {
 	int i;
 
@@ -1768,8 +1772,7 @@
  * This kprobe pre_handler is registered with every kretprobe. When probe
  * hits it will set up the return probe.
  */
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-					   struct pt_regs *regs)
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
 	unsigned long hash, flags = 0;
@@ -1807,8 +1810,9 @@
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
-int __kprobes register_kretprobe(struct kretprobe *rp)
+int register_kretprobe(struct kretprobe *rp)
 {
 	int ret = 0;
 	struct kretprobe_instance *inst;
@@ -1861,7 +1865,7 @@
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+int register_kretprobes(struct kretprobe **rps, int num)
 {
 	int ret = 0, i;
 
@@ -1879,13 +1883,13 @@
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
-void __kprobes unregister_kretprobe(struct kretprobe *rp)
+void unregister_kretprobe(struct kretprobe *rp)
 {
 	unregister_kretprobes(&rp, 1);
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 
-void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+void unregister_kretprobes(struct kretprobe **rps, int num)
 {
 	int i;
 
@@ -1908,38 +1912,38 @@
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
 
 #else /* CONFIG_KRETPROBES */
-int __kprobes register_kretprobe(struct kretprobe *rp)
+int register_kretprobe(struct kretprobe *rp)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+int register_kretprobes(struct kretprobe **rps, int num)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
-void __kprobes unregister_kretprobe(struct kretprobe *rp)
+void unregister_kretprobe(struct kretprobe *rp)
 {
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
 
-void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+void unregister_kretprobes(struct kretprobe **rps, int num)
 {
 }
 EXPORT_SYMBOL_GPL(unregister_kretprobes);
 
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-					   struct pt_regs *regs)
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
 	return 0;
 }
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
 #endif /* CONFIG_KRETPROBES */
 
 /* Set the kprobe gone and remove its instruction buffer. */
-static void __kprobes kill_kprobe(struct kprobe *p)
+static void kill_kprobe(struct kprobe *p)
 {
 	struct kprobe *kp;
 
@@ -1963,7 +1967,7 @@
 }
 
 /* Disable one kprobe */
-int __kprobes disable_kprobe(struct kprobe *kp)
+int disable_kprobe(struct kprobe *kp)
 {
 	int ret = 0;
 
@@ -1979,7 +1983,7 @@
 EXPORT_SYMBOL_GPL(disable_kprobe);
 
 /* Enable one kprobe */
-int __kprobes enable_kprobe(struct kprobe *kp)
+int enable_kprobe(struct kprobe *kp)
 {
 	int ret = 0;
 	struct kprobe *p;
@@ -2012,16 +2016,49 @@
 }
 EXPORT_SYMBOL_GPL(enable_kprobe);
 
-void __kprobes dump_kprobe(struct kprobe *kp)
+void dump_kprobe(struct kprobe *kp)
 {
 	printk(KERN_WARNING "Dumping kprobe:\n");
 	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
 	       kp->symbol_name, kp->addr, kp->offset);
 }
+NOKPROBE_SYMBOL(dump_kprobe);
+
+/*
+ * Lookup and populate the kprobe_blacklist.
+ *
+ * Unlike the kretprobe blacklist, we'll need to determine
+ * the range of addresses that belong to the said functions,
+ * since a kprobe need not necessarily be at the beginning
+ * of a function.
+ */
+static int __init populate_kprobe_blacklist(unsigned long *start,
+					     unsigned long *end)
+{
+	unsigned long *iter;
+	struct kprobe_blacklist_entry *ent;
+	unsigned long offset = 0, size = 0;
+
+	for (iter = start; iter < end; iter++) {
+		if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
+			pr_err("Failed to find blacklist %p\n", (void *)*iter);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			return -ENOMEM;
+		ent->start_addr = *iter;
+		ent->end_addr = *iter + size;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &kprobe_blacklist);
+	}
+	return 0;
+}
 
 /* Module notifier call back, checking kprobes on the module */
-static int __kprobes kprobes_module_callback(struct notifier_block *nb,
-					     unsigned long val, void *data)
+static int kprobes_module_callback(struct notifier_block *nb,
+				   unsigned long val, void *data)
 {
 	struct module *mod = data;
 	struct hlist_head *head;
@@ -2062,14 +2099,13 @@
 	.priority = 0
 };
 
+/* Markers of _kprobe_blacklist section */
+extern unsigned long __start_kprobe_blacklist[];
+extern unsigned long __stop_kprobe_blacklist[];
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
-	unsigned long offset = 0, size = 0;
-	char *modname, namebuf[KSYM_NAME_LEN];
-	const char *symbol_name;
-	void *addr;
-	struct kprobe_blackpoint *kb;
 
 	/* FIXME allocate the probe table, currently defined statically */
 	/* initialize all list heads */
@@ -2079,26 +2115,11 @@
 		raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
-	/*
-	 * Lookup and populate the kprobe_blacklist.
-	 *
-	 * Unlike the kretprobe blacklist, we'll need to determine
-	 * the range of addresses that belong to the said functions,
-	 * since a kprobe need not necessarily be at the beginning
-	 * of a function.
-	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		kprobe_lookup_name(kb->name, addr);
-		if (!addr)
-			continue;
-
-		kb->start_addr = (unsigned long)addr;
-		symbol_name = kallsyms_lookup(kb->start_addr,
-				&size, &offset, &modname, namebuf);
-		if (!symbol_name)
-			kb->range = 0;
-		else
-			kb->range = size;
+	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
+					__stop_kprobe_blacklist);
+	if (err) {
+		pr_err("kprobes: failed to populate blacklist: %d\n", err);
+		pr_err("Please take care of using kprobes.\n");
 	}
 
 	if (kretprobe_blacklist_size) {
@@ -2138,7 +2159,7 @@
 }
 
 #ifdef CONFIG_DEBUG_FS
-static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
+static void report_probe(struct seq_file *pi, struct kprobe *p,
 		const char *sym, int offset, char *modname, struct kprobe *pp)
 {
 	char *kprobe_type;
@@ -2167,12 +2188,12 @@
 		(kprobe_ftrace(pp) ? "[FTRACE]" : ""));
 }
 
-static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
+static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
 {
 	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
 }
 
-static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
+static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
 	(*pos)++;
 	if (*pos >= KPROBE_TABLE_SIZE)
@@ -2180,12 +2201,12 @@
 	return pos;
 }
 
-static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
+static void kprobe_seq_stop(struct seq_file *f, void *v)
 {
 	/* Nothing to do */
 }
 
-static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
+static int show_kprobe_addr(struct seq_file *pi, void *v)
 {
 	struct hlist_head *head;
 	struct kprobe *p, *kp;
@@ -2216,7 +2237,7 @@
 	.show  = show_kprobe_addr
 };
 
-static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
+static int kprobes_open(struct inode *inode, struct file *filp)
 {
 	return seq_open(filp, &kprobes_seq_ops);
 }
@@ -2228,7 +2249,47 @@
 	.release        = seq_release,
 };
 
-static void __kprobes arm_all_kprobes(void)
+/* kprobes/blacklist -- shows which functions can not be probed */
+static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
+{
+	return seq_list_start(&kprobe_blacklist, *pos);
+}
+
+static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &kprobe_blacklist, pos);
+}
+
+static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
+{
+	struct kprobe_blacklist_entry *ent =
+		list_entry(v, struct kprobe_blacklist_entry, list);
+
+	seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
+		   (void *)ent->end_addr, (void *)ent->start_addr);
+	return 0;
+}
+
+static const struct seq_operations kprobe_blacklist_seq_ops = {
+	.start = kprobe_blacklist_seq_start,
+	.next  = kprobe_blacklist_seq_next,
+	.stop  = kprobe_seq_stop,	/* Reuse void function */
+	.show  = kprobe_blacklist_seq_show,
+};
+
+static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &kprobe_blacklist_seq_ops);
+}
+
+static const struct file_operations debugfs_kprobe_blacklist_ops = {
+	.open           = kprobe_blacklist_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static void arm_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -2256,7 +2317,7 @@
 	return;
 }
 
-static void __kprobes disarm_all_kprobes(void)
+static void disarm_all_kprobes(void)
 {
 	struct hlist_head *head;
 	struct kprobe *p;
@@ -2340,7 +2401,7 @@
 	.llseek =	default_llseek,
 };
 
-static int __kprobes debugfs_kprobe_init(void)
+static int __init debugfs_kprobe_init(void)
 {
 	struct dentry *dir, *file;
 	unsigned int value = 1;
@@ -2351,19 +2412,24 @@
 
 	file = debugfs_create_file("list", 0444, dir, NULL,
 				&debugfs_kprobes_operations);
-	if (!file) {
-		debugfs_remove(dir);
-		return -ENOMEM;
-	}
+	if (!file)
+		goto error;
 
 	file = debugfs_create_file("enabled", 0600, dir,
 					&value, &fops_kp);
-	if (!file) {
-		debugfs_remove(dir);
-		return -ENOMEM;
-	}
+	if (!file)
+		goto error;
+
+	file = debugfs_create_file("blacklist", 0444, dir, NULL,
+				&debugfs_kprobe_blacklist_ops);
+	if (!file)
+		goto error;
 
 	return 0;
+
+error:
+	debugfs_remove(dir);
+	return -ENOMEM;
 }
 
 late_initcall(debugfs_kprobe_init);

diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index b8bdcd4..8541bfd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile

@@ -24,4 +24,5 @@
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
+obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o

diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
new file mode 100644
index 0000000..fb5b8ac
--- /dev/null
+++ b/kernel/locking/qrwlock.c

@@ -0,0 +1,133 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#include <linux/smp.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mutex.h>
+#include <asm/qrwlock.h>
+
+/**
+ * rspin_until_writer_unlock - inc reader count & spin until writer is gone
+ * @lock  : Pointer to queue rwlock structure
+ * @writer: Current queue rwlock writer status byte
+ *
+ * In interrupt context or at the head of the queue, the reader will just
+ * increment the reader count & wait until the writer releases the lock.
+ */
+static __always_inline void
+rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
+{
+	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
+		arch_mutex_cpu_relax();
+		cnts = smp_load_acquire((u32 *)&lock->cnts);
+	}
+}
+
+/**
+ * queue_read_lock_slowpath - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+void queue_read_lock_slowpath(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	/*
+	 * Readers come here when they cannot get the lock without waiting
+	 */
+	if (unlikely(in_interrupt())) {
+		/*
+		 * Readers in interrupt context will spin until the lock is
+		 * available without waiting in the queue.
+		 */
+		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		rspin_until_writer_unlock(lock, cnts);
+		return;
+	}
+	atomic_sub(_QR_BIAS, &lock->cnts);
+
+	/*
+	 * Put the reader into the wait queue
+	 */
+	arch_spin_lock(&lock->lock);
+
+	/*
+	 * At the head of the wait queue now, wait until the writer state
+	 * goes to 0 and then try to increment the reader count and get
+	 * the lock. It is possible that an incoming writer may steal the
+	 * lock in the interim, so it is necessary to check the writer byte
+	 * to make sure that the write lock isn't taken.
+	 */
+	while (atomic_read(&lock->cnts) & _QW_WMASK)
+		arch_mutex_cpu_relax();
+
+	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	rspin_until_writer_unlock(lock, cnts);
+
+	/*
+	 * Signal the next one in queue to become queue head
+	 */
+	arch_spin_unlock(&lock->lock);
+}
+EXPORT_SYMBOL(queue_read_lock_slowpath);
+
+/**
+ * queue_write_lock_slowpath - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+void queue_write_lock_slowpath(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	/* Put the writer into the wait queue */
+	arch_spin_lock(&lock->lock);
+
+	/* Try to acquire the lock directly if no reader is present */
+	if (!atomic_read(&lock->cnts) &&
+	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+		goto unlock;
+
+	/*
+	 * Set the waiting flag to notify readers that a writer is pending,
+	 * or wait for a previous writer to go away.
+	 */
+	for (;;) {
+		cnts = atomic_read(&lock->cnts);
+		if (!(cnts & _QW_WMASK) &&
+		    (atomic_cmpxchg(&lock->cnts, cnts,
+				    cnts | _QW_WAITING) == cnts))
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+
+	/* When no more readers, set the locked flag */
+	for (;;) {
+		cnts = atomic_read(&lock->cnts);
+		if ((cnts == _QW_WAITING) &&
+		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
+				    _QW_LOCKED) == _QW_WAITING))
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+unlock:
+	arch_spin_unlock(&lock->lock);
+}
+EXPORT_SYMBOL(queue_write_lock_slowpath);

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff..dacc321 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c

@@ -5,11 +5,17 @@
  *
  * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
  * and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
  */
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/sched/rt.h>
+
+#include "mcs_spinlock.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -76,6 +82,10 @@
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
+#ifdef CONFIG_SMP
+	sem->owner = NULL;
+	sem->osq = NULL;
+#endif
 }
 
 EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +200,7 @@
 }
 
 /*
- * wait for the read lock to be granted
+ * Wait for the read lock to be granted
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +247,221 @@
 	return sem;
 }
 
+static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
+{
+	if (!(count & RWSEM_ACTIVE_MASK)) {
+		/* try acquiring the write lock */
+		if (sem->count == RWSEM_WAITING_BIAS &&
+		    cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+			    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
+			if (!list_is_singular(&sem->wait_list))
+				rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef CONFIG_SMP
 /*
- * wait until we successfully acquire the write lock
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+	long old, count = ACCESS_ONCE(sem->count);
+
+	while (true) {
+		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
+			return false;
+
+		old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+		if (old == count)
+			return true;
+
+		count = old;
+	}
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool on_cpu = true;
+
+	if (need_resched())
+		return 0;
+
+	rcu_read_lock();
+	owner = ACCESS_ONCE(sem->owner);
+	if (owner)
+		on_cpu = owner->on_cpu;
+	rcu_read_unlock();
+
+	/*
+	 * If sem->owner is not set, the rwsem owner may have
+	 * just acquired it and not set the owner yet or the rwsem
+	 * has been released.
+	 */
+	return on_cpu;
+}
+
+static inline bool owner_running(struct rw_semaphore *sem,
+				 struct task_struct *owner)
+{
+	if (sem->owner != owner)
+		return false;
+
+	/*
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * sem->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
+	 */
+	barrier();
+
+	return owner->on_cpu;
+}
+
+static noinline
+bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+{
+	rcu_read_lock();
+	while (owner_running(sem, owner)) {
+		if (need_resched())
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+	rcu_read_unlock();
+
+	/*
+	 * We break out the loop above on need_resched() or when the
+	 * owner changed, which is a sign for heavy contention. Return
+	 * success only when sem->owner is NULL.
+	 */
+	return sem->owner == NULL;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool taken = false;
+
+	preempt_disable();
+
+	/* sem->wait_lock should not be held when doing optimistic spinning */
+	if (!rwsem_can_spin_on_owner(sem))
+		goto done;
+
+	if (!osq_lock(&sem->osq))
+		goto done;
+
+	while (true) {
+		owner = ACCESS_ONCE(sem->owner);
+		if (owner && !rwsem_spin_on_owner(sem, owner))
+			break;
+
+		/* wait_lock will be acquired if write_lock is obtained */
+		if (rwsem_try_write_lock_unqueued(sem)) {
+			taken = true;
+			break;
+		}
+
+		/*
+		 * When there's no owner, we might have preempted between the
+		 * owner acquiring the lock and setting the owner field. If
+		 * we're an RT task that will live-lock because we won't let
+		 * the owner complete.
+		 */
+		if (!owner && (need_resched() || rt_task(current)))
+			break;
+
+		/*
+		 * The cpu_relax() call is a compiler barrier which forces
+		 * everything in this loop to be re-loaded. We don't need
+		 * memory barriers as we'll eventually observe the right
+		 * values at the cost of a few extra spins.
+		 */
+		arch_mutex_cpu_relax();
+	}
+	osq_unlock(&sem->osq);
+done:
+	preempt_enable();
+	return taken;
+}
+
+#else
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	return false;
+}
+#endif
+
+/*
+ * Wait until we successfully acquire the write lock
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 {
-	long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
+	long count;
+	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
-	struct task_struct *tsk = current;
 
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
+	/* undo write bias from down_write operation, stop active locking */
+	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+
+	/* do optimistic spinning and steal lock if possible */
+	if (rwsem_optimistic_spin(sem))
+		return sem;
+
+	/*
+	 * Optimistic spinning failed, proceed to the slowpath
+	 * and block until we can acquire the sem.
+	 */
+	waiter.task = current;
 	waiter.type = RWSEM_WAITING_FOR_WRITE;
 
 	raw_spin_lock_irq(&sem->wait_lock);
+
+	/* account for this before adding a new element to the list */
 	if (list_empty(&sem->wait_list))
-		adjustment += RWSEM_WAITING_BIAS;
+		waiting = false;
+
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
+	if (waiting) {
+		count = ACCESS_ONCE(sem->count);
 
-	/* If there were already threads queued before us and there are no
-	 * active writers, the lock must be read owned; so we try to wake
-	 * any read locks that were queued ahead of us. */
-	if (count > RWSEM_WAITING_BIAS &&
-	    adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+		/*
+		 * If there were already threads queued before us and there are
+		 * no active writers, the lock must be read owned; so we try to
+		 * wake any read locks that were queued ahead of us.
+		 */
+		if (count > RWSEM_WAITING_BIAS)
+			sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+
+	} else
+		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	set_current_state(TASK_UNINTERRUPTIBLE);
 	while (true) {
-		if (!(count & RWSEM_ACTIVE_MASK)) {
-			/* Try acquiring the write lock. */
-			count = RWSEM_ACTIVE_WRITE_BIAS;
-			if (!list_is_singular(&sem->wait_list))
-				count += RWSEM_WAITING_BIAS;
-
-			if (sem->count == RWSEM_WAITING_BIAS &&
-			    cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
-							RWSEM_WAITING_BIAS)
-				break;
-		}
-
+		if (rwsem_try_write_lock(count, sem))
+			break;
 		raw_spin_unlock_irq(&sem->wait_lock);
 
 		/* Block until there are no active lockers. */
 		do {
 			schedule();
-			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			set_current_state(TASK_UNINTERRUPTIBLE);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
+	__set_current_state(TASK_RUNNING);
 
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
-	tsk->state = TASK_RUNNING;
 
 	return sem;
 }

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff143..42f806d 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c

@@ -12,6 +12,27 @@
 
 #include <linux/atomic.h>
 
+#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+	sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+	sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
+
 /*
  * lock for reading
  */
@@ -48,6 +69,7 @@
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@
 {
 	int ret = __down_write_trylock(sem);
 
-	if (ret == 1)
+	if (ret == 1) {
 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
+		rwsem_set_owner(sem);
+	}
+
 	return ret;
 }
 
@@ -85,6 +110,7 @@
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
 
+	rwsem_clear_owner(sem);
 	__up_write(sem);
 }
 
@@ -99,6 +125,7 @@
 	 * lockdep: a downgraded write will live on as a write
 	 * dependency.
 	 */
+	rwsem_clear_owner(sem);
 	__downgrade_write(sem);
 }
 
@@ -122,6 +149,7 @@
 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write_nested);

diff --git a/kernel/notifier.c b/kernel/notifier.c
index db4c8b0..4803da6 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c

@@ -71,9 +71,9 @@
  *	@returns:	notifier_call_chain returns the value returned by the
  *			last notifier function called.
  */
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
-					unsigned long val, void *v,
-					int nr_to_call,	int *nr_calls)
+static int notifier_call_chain(struct notifier_block **nl,
+			       unsigned long val, void *v,
+			       int nr_to_call, int *nr_calls)
 {
 	int ret = NOTIFY_DONE;
 	struct notifier_block *nb, *next_nb;
@@ -102,6 +102,7 @@
 	}
 	return ret;
 }
+NOKPROBE_SYMBOL(notifier_call_chain);
 
 /*
  *	Atomic notifier chain routines.  Registration and unregistration
@@ -172,9 +173,9 @@
  *	Otherwise the return value is the return value
  *	of the last notifier function called.
  */
-int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-					unsigned long val, void *v,
-					int nr_to_call, int *nr_calls)
+int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+				 unsigned long val, void *v,
+				 int nr_to_call, int *nr_calls)
 {
 	int ret;
 
@@ -184,13 +185,15 @@
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+NOKPROBE_SYMBOL(__atomic_notifier_call_chain);
 
-int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-		unsigned long val, void *v)
+int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+			       unsigned long val, void *v)
 {
 	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
 }
 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+NOKPROBE_SYMBOL(atomic_notifier_call_chain);
 
 /*
  *	Blocking notifier chain routines.  All access to the chain is
@@ -527,7 +530,7 @@
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notrace __kprobes notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
 	       struct pt_regs *regs, long err, int trap, int sig)
 {
 	struct die_args args = {
@@ -540,6 +543,7 @@
 	};
 	return atomic_notifier_call_chain(&die_chain, val, &args);
 }
+NOKPROBE_SYMBOL(notify_die);
 
 int register_die_notifier(struct notifier_block *nb)
 {

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c6b9879..3bdf01b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -535,7 +535,7 @@
  	__old;								\
 })
 
-#ifdef TIF_POLLING_NRFLAG
+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
  * this avoids any races wrt polling state changes and thereby avoids
@@ -546,12 +546,44 @@
 	struct thread_info *ti = task_thread_info(p);
 	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
 }
+
+/*
+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
+ *
+ * If this returns true, then the idle task promises to call
+ * sched_ttwu_pending() and reschedule soon.
+ */
+static bool set_nr_if_polling(struct task_struct *p)
+{
+	struct thread_info *ti = task_thread_info(p);
+	typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
+
+	for (;;) {
+		if (!(val & _TIF_POLLING_NRFLAG))
+			return false;
+		if (val & _TIF_NEED_RESCHED)
+			return true;
+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
+		if (old == val)
+			break;
+		val = old;
+	}
+	return true;
+}
+
 #else
 static bool set_nr_and_not_polling(struct task_struct *p)
 {
 	set_tsk_need_resched(p);
 	return true;
 }
+
+#ifdef CONFIG_SMP
+static bool set_nr_if_polling(struct task_struct *p)
+{
+	return false;
+}
+#endif
 #endif
 
 /*
@@ -580,6 +612,8 @@
 
 	if (set_nr_and_not_polling(p))
 		smp_send_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
 void resched_cpu(int cpu)
@@ -642,27 +676,10 @@
 	if (cpu == smp_processor_id())
 		return;
 
-	/*
-	 * This is safe, as this function is called with the timer
-	 * wheel base lock of (cpu) held. When the CPU is on the way
-	 * to idle and has not yet set rq->curr to idle then it will
-	 * be serialized on the timer wheel base lock and take the new
-	 * timer into account automatically.
-	 */
-	if (rq->curr != rq->idle)
-		return;
-
-	/*
-	 * We can set TIF_RESCHED on the idle task of the other CPU
-	 * lockless. The worst case is that the other CPU runs the
-	 * idle task through an additional NOOP schedule()
-	 */
-	set_tsk_need_resched(rq->idle);
-
-	/* NEED_RESCHED must be visible before we test polling */
-	smp_mb();
-	if (!tsk_is_polling(rq->idle))
+	if (set_nr_and_not_polling(rq->idle))
 		smp_send_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
 static bool wake_up_full_nohz_cpu(int cpu)
@@ -888,7 +905,7 @@
 	rq->clock_task += delta;
 
 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
-	if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 		sched_rt_avg_update(rq, irq_delta + steal);
 #endif
 }
@@ -1521,13 +1538,17 @@
 }
 
 #ifdef CONFIG_SMP
-static void sched_ttwu_pending(void)
+void sched_ttwu_pending(void)
 {
 	struct rq *rq = this_rq();
 	struct llist_node *llist = llist_del_all(&rq->wake_list);
 	struct task_struct *p;
+	unsigned long flags;
 
-	raw_spin_lock(&rq->lock);
+	if (!llist)
+		return;
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
 
 	while (llist) {
 		p = llist_entry(llist, struct task_struct, wake_entry);
@@ -1535,7 +1556,7 @@
 		ttwu_do_activate(rq, p, 0);
 	}
 
-	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
 void scheduler_ipi(void)
@@ -1581,8 +1602,14 @@
 
 static void ttwu_queue_remote(struct task_struct *p, int cpu)
 {
-	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
-		smp_send_reschedule(cpu);
+	struct rq *rq = cpu_rq(cpu);
+
+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
+		if (!set_nr_if_polling(rq->idle))
+			smp_send_reschedule(cpu);
+		else
+			trace_sched_wake_idle_without_ipi(cpu);
+	}
 }
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
@@ -2527,7 +2554,7 @@
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
-void __kprobes preempt_count_add(int val)
+void preempt_count_add(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2553,8 +2580,9 @@
 	}
 }
 EXPORT_SYMBOL(preempt_count_add);
+NOKPROBE_SYMBOL(preempt_count_add);
 
-void __kprobes preempt_count_sub(int val)
+void preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
 	/*
@@ -2575,6 +2603,7 @@
 	__preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
+NOKPROBE_SYMBOL(preempt_count_sub);
 
 #endif
 
@@ -2857,6 +2886,7 @@
 		barrier();
 	} while (need_resched());
 }
+NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 #endif /* CONFIG_PREEMPT */
 
@@ -4216,7 +4246,7 @@
  *	false (0) if we failed to boost the target.
  *	-ESRCH if there's no task to yield to.
  */
-bool __sched yield_to(struct task_struct *p, bool preempt)
+int __sched yield_to(struct task_struct *p, bool preempt)
 {
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
@@ -5242,14 +5272,13 @@
 		}
 
 		/*
-		 * Even though we initialize ->power to something semi-sane,
-		 * we leave power_orig unset. This allows us to detect if
+		 * Even though we initialize ->capacity to something semi-sane,
+		 * we leave capacity_orig unset. This allows us to detect if
 		 * domain iteration is still funny without causing /0 traps.
 		 */
-		if (!group->sgp->power_orig) {
+		if (!group->sgc->capacity_orig) {
 			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: domain->cpu_power not "
-					"set\n");
+			printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
 			break;
 		}
 
@@ -5271,9 +5300,9 @@
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->sgp->power != SCHED_POWER_SCALE) {
-			printk(KERN_CONT " (cpu_power = %d)",
-				group->sgp->power);
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
+			printk(KERN_CONT " (cpu_capacity = %d)",
+				group->sgc->capacity);
 		}
 
 		group = group->next;
@@ -5331,7 +5360,7 @@
 			 SD_BALANCE_NEWIDLE |
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
-			 SD_SHARE_CPUPOWER |
+			 SD_SHARE_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
 			 SD_SHARE_POWERDOMAIN)) {
 		if (sd->groups != sd->groups->next)
@@ -5362,7 +5391,7 @@
 				SD_BALANCE_NEWIDLE |
 				SD_BALANCE_FORK |
 				SD_BALANCE_EXEC |
-				SD_SHARE_CPUPOWER |
+				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
 				SD_SHARE_POWERDOMAIN);
@@ -5487,7 +5516,7 @@
 	return rd;
 }
 
-static void free_sched_groups(struct sched_group *sg, int free_sgp)
+static void free_sched_groups(struct sched_group *sg, int free_sgc)
 {
 	struct sched_group *tmp, *first;
 
@@ -5498,8 +5527,8 @@
 	do {
 		tmp = sg->next;
 
-		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
-			kfree(sg->sgp);
+		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
+			kfree(sg->sgc);
 
 		kfree(sg);
 		sg = tmp;
@@ -5517,7 +5546,7 @@
 	if (sd->flags & SD_OVERLAP) {
 		free_sched_groups(sd->groups, 1);
 	} else if (atomic_dec_and_test(&sd->groups->ref)) {
-		kfree(sd->groups->sgp);
+		kfree(sd->groups->sgc);
 		kfree(sd->groups);
 	}
 	kfree(sd);
@@ -5728,17 +5757,17 @@
 
 		cpumask_or(covered, covered, sg_span);
 
-		sg->sgp = *per_cpu_ptr(sdd->sgp, i);
-		if (atomic_inc_return(&sg->sgp->ref) == 1)
+		sg->sgc = *per_cpu_ptr(sdd->sgc, i);
+		if (atomic_inc_return(&sg->sgc->ref) == 1)
 			build_group_mask(sd, sg);
 
 		/*
-		 * Initialize sgp->power such that even if we mess up the
+		 * Initialize sgc->capacity such that even if we mess up the
 		 * domains and no possible iteration will get us here, we won't
 		 * die on a /0 trap.
 		 */
-		sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
-		sg->sgp->power_orig = sg->sgp->power;
+		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -5776,8 +5805,8 @@
 
 	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
-		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
+		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+		atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */
 	}
 
 	return cpu;
@@ -5786,7 +5815,7 @@
 /*
  * build_sched_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_power to 0.
+ * and ->cpu_capacity to 0.
  *
  * Assumes the sched_domain tree is fully constructed
  */
@@ -5840,16 +5869,16 @@
 }
 
 /*
- * Initialize sched groups cpu_power.
+ * Initialize sched groups cpu_capacity.
  *
- * cpu_power indicates the capacity of sched group, which is used while
+ * cpu_capacity indicates the capacity of sched group, which is used while
  * distributing the load between different sched groups in a sched domain.
- * Typically cpu_power for all the groups in a sched domain will be same unless
- * there are asymmetries in the topology. If there are asymmetries, group
- * having more cpu_power will pickup more load compared to the group having
- * less cpu_power.
+ * Typically cpu_capacity for all the groups in a sched domain will be same
+ * unless there are asymmetries in the topology. If there are asymmetries,
+ * group having more cpu_capacity will pickup more load compared to the
+ * group having less cpu_capacity.
  */
-static void init_sched_groups_power(int cpu, struct sched_domain *sd)
+static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
 	struct sched_group *sg = sd->groups;
 
@@ -5863,8 +5892,8 @@
 	if (cpu != group_balance_cpu(sg))
 		return;
 
-	update_group_power(sd, cpu);
-	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
+	update_group_capacity(sd, cpu);
+	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
 /*
@@ -5955,8 +5984,8 @@
 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
 
-	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
-		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
+	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
+		*per_cpu_ptr(sdd->sgc, cpu) = NULL;
 }
 
 #ifdef CONFIG_NUMA
@@ -5969,7 +5998,7 @@
 /*
  * SD_flags allowed in topology descriptions.
  *
- * SD_SHARE_CPUPOWER      - describes SMT topologies
+ * SD_SHARE_CPUCAPACITY      - describes SMT topologies
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
  * SD_SHARE_POWERDOMAIN   - describes shared power domain
@@ -5978,7 +6007,7 @@
  * SD_ASYM_PACKING        - describes SMT quirks
  */
 #define TOPOLOGY_SD_FLAGS		\
-	(SD_SHARE_CPUPOWER |		\
+	(SD_SHARE_CPUCAPACITY |		\
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
@@ -6024,7 +6053,7 @@
 					| 1*SD_BALANCE_FORK
 					| 0*SD_BALANCE_WAKE
 					| 1*SD_WAKE_AFFINE
-					| 0*SD_SHARE_CPUPOWER
+					| 0*SD_SHARE_CPUCAPACITY
 					| 0*SD_SHARE_PKG_RESOURCES
 					| 0*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
@@ -6046,7 +6075,7 @@
 	 * Convert topological properties into behaviour.
 	 */
 
-	if (sd->flags & SD_SHARE_CPUPOWER) {
+	if (sd->flags & SD_SHARE_CPUCAPACITY) {
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
 
@@ -6358,14 +6387,14 @@
 		if (!sdd->sg)
 			return -ENOMEM;
 
-		sdd->sgp = alloc_percpu(struct sched_group_power *);
-		if (!sdd->sgp)
+		sdd->sgc = alloc_percpu(struct sched_group_capacity *);
+		if (!sdd->sgc)
 			return -ENOMEM;
 
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
 			struct sched_group *sg;
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 
 		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
@@ -6383,12 +6412,12 @@
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
 
-			sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
+			sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
-			if (!sgp)
+			if (!sgc)
 				return -ENOMEM;
 
-			*per_cpu_ptr(sdd->sgp, j) = sgp;
+			*per_cpu_ptr(sdd->sgc, j) = sgc;
 		}
 	}
 
@@ -6415,15 +6444,15 @@
 
 			if (sdd->sg)
 				kfree(*per_cpu_ptr(sdd->sg, j));
-			if (sdd->sgp)
-				kfree(*per_cpu_ptr(sdd->sgp, j));
+			if (sdd->sgc)
+				kfree(*per_cpu_ptr(sdd->sgc, j));
 		}
 		free_percpu(sdd->sd);
 		sdd->sd = NULL;
 		free_percpu(sdd->sg);
 		sdd->sg = NULL;
-		free_percpu(sdd->sgp);
-		sdd->sgp = NULL;
+		free_percpu(sdd->sgc);
+		sdd->sgc = NULL;
 	}
 }
 
@@ -6493,14 +6522,14 @@
 		}
 	}
 
-	/* Calculate CPU power for physical packages and nodes */
+	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
 		if (!cpumask_test_cpu(i, cpu_map))
 			continue;
 
 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 			claim_allocations(i, sd);
-			init_sched_groups_power(i, sd);
+			init_sched_groups_capacity(i, sd);
 		}
 	}
 
@@ -6943,7 +6972,7 @@
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_power = SCHED_POWER_SCALE;
+		rq->cpu_capacity = SCHED_CAPACITY_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 2b8cbf0..fc4f98b1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c

@@ -57,8 +57,6 @@
 	dl_b->dl_runtime = runtime;
 }
 
-extern unsigned long to_ratio(u64 period, u64 runtime);
-
 void init_dl_bw(struct dl_bw *dl_b)
 {
 	raw_spin_lock_init(&dl_b->lock);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9855e87..fea7d33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -1017,7 +1017,7 @@
 static unsigned long weighted_cpuload(const int cpu);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
-static unsigned long power_of(int cpu);
+static unsigned long capacity_of(int cpu);
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
 /* Cached statistics for all CPUs within a node */
@@ -1026,11 +1026,11 @@
 	unsigned long load;
 
 	/* Total compute capacity of CPUs on a node */
-	unsigned long power;
+	unsigned long compute_capacity;
 
 	/* Approximate capacity in terms of runnable tasks on a node */
-	unsigned long capacity;
-	int has_capacity;
+	unsigned long task_capacity;
+	int has_free_capacity;
 };
 
 /*
@@ -1046,7 +1046,7 @@
 
 		ns->nr_running += rq->nr_running;
 		ns->load += weighted_cpuload(cpu);
-		ns->power += power_of(cpu);
+		ns->compute_capacity += capacity_of(cpu);
 
 		cpus++;
 	}
@@ -1056,15 +1056,16 @@
 	 * the @ns structure is NULL'ed and task_numa_compare() will
 	 * not find this node attractive.
 	 *
-	 * We'll either bail at !has_capacity, or we'll detect a huge imbalance
-	 * and bail there.
+	 * We'll either bail at !has_free_capacity, or we'll detect a huge
+	 * imbalance and bail there.
 	 */
 	if (!cpus)
 		return;
 
-	ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
-	ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
-	ns->has_capacity = (ns->nr_running < ns->capacity);
+	ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity;
+	ns->task_capacity =
+		DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
+	ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
 }
 
 struct task_numa_env {
@@ -1195,8 +1196,8 @@
 
 	if (!cur) {
 		/* Is there capacity at our destination? */
-		if (env->src_stats.has_capacity &&
-		    !env->dst_stats.has_capacity)
+		if (env->src_stats.has_free_capacity &&
+		    !env->dst_stats.has_free_capacity)
 			goto unlock;
 
 		goto balance;
@@ -1213,7 +1214,7 @@
 	orig_dst_load = env->dst_stats.load;
 	orig_src_load = env->src_stats.load;
 
-	/* XXX missing power terms */
+	/* XXX missing capacity terms */
 	load = task_h_load(env->p);
 	dst_load = orig_dst_load + load;
 	src_load = orig_src_load - load;
@@ -1301,8 +1302,8 @@
 	groupimp = group_weight(p, env.dst_nid) - groupweight;
 	update_numa_stats(&env.dst_stats, env.dst_nid);
 
-	/* If the preferred nid has capacity, try to use it. */
-	if (env.dst_stats.has_capacity)
+	/* If the preferred nid has free capacity, try to use it. */
+	if (env.dst_stats.has_free_capacity)
 		task_numa_find_cpu(&env, taskimp, groupimp);
 
 	/* No space available on the preferred nid. Look elsewhere. */
@@ -3225,10 +3226,12 @@
 	 * has not truly expired.
 	 *
 	 * Fortunately we can check determine whether this the case by checking
-	 * whether the global deadline has advanced.
+	 * whether the global deadline has advanced. It is valid to compare
+	 * cfs_b->runtime_expires without any locks since we only care about
+	 * exact equality, so a partial write will still work.
 	 */
 
-	if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
+	if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
 		/* extend local deadline, drift is bounded above by 2 ticks */
 		cfs_rq->runtime_expires += TICK_NSEC;
 	} else {
@@ -3457,21 +3460,21 @@
 static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 {
 	u64 runtime, runtime_expires;
-	int idle = 1, throttled;
+	int throttled;
 
-	raw_spin_lock(&cfs_b->lock);
 	/* no need to continue the timer with no bandwidth constraint */
 	if (cfs_b->quota == RUNTIME_INF)
-		goto out_unlock;
+		goto out_deactivate;
 
 	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
-	/* idle depends on !throttled (for the case of a large deficit) */
-	idle = cfs_b->idle && !throttled;
 	cfs_b->nr_periods += overrun;
 
-	/* if we're going inactive then everything else can be deferred */
-	if (idle)
-		goto out_unlock;
+	/*
+	 * idle depends on !throttled (for the case of a large deficit), and if
+	 * we're going inactive then everything else can be deferred
+	 */
+	if (cfs_b->idle && !throttled)
+		goto out_deactivate;
 
 	/*
 	 * if we have relooped after returning idle once, we need to update our
@@ -3485,7 +3488,7 @@
 	if (!throttled) {
 		/* mark as potentially idle for the upcoming period */
 		cfs_b->idle = 1;
-		goto out_unlock;
+		return 0;
 	}
 
 	/* account preceding periods in which throttling occurred */
@@ -3525,12 +3528,12 @@
 	 * timer to remain active while there are any throttled entities.)
 	 */
 	cfs_b->idle = 0;
-out_unlock:
-	if (idle)
-		cfs_b->timer_active = 0;
-	raw_spin_unlock(&cfs_b->lock);
 
-	return idle;
+	return 0;
+
+out_deactivate:
+	cfs_b->timer_active = 0;
+	return 1;
 }
 
 /* a cfs_rq won't donate quota below this amount */
@@ -3707,6 +3710,7 @@
 	int overrun;
 	int idle = 0;
 
+	raw_spin_lock(&cfs_b->lock);
 	for (;;) {
 		now = hrtimer_cb_get_time(timer);
 		overrun = hrtimer_forward(timer, now, cfs_b->period);
@@ -3716,6 +3720,7 @@
 
 		idle = do_sched_cfs_period_timer(cfs_b, overrun);
 	}
+	raw_spin_unlock(&cfs_b->lock);
 
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
@@ -3775,8 +3780,6 @@
 	struct cfs_rq *cfs_rq;
 
 	for_each_leaf_cfs_rq(rq, cfs_rq) {
-		struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-
 		if (!cfs_rq->runtime_enabled)
 			continue;
 
@@ -3784,7 +3787,7 @@
 		 * clock_task is not advancing so we just need to make sure
 		 * there's some valid quota amount
 		 */
-		cfs_rq->runtime_remaining = cfs_b->quota;
+		cfs_rq->runtime_remaining = 1;
 		if (cfs_rq_throttled(cfs_rq))
 			unthrottle_cfs_rq(cfs_rq);
 	}
@@ -4041,9 +4044,9 @@
 	return max(rq->cpu_load[type-1], total);
 }
 
-static unsigned long power_of(int cpu)
+static unsigned long capacity_of(int cpu)
 {
-	return cpu_rq(cpu)->cpu_power;
+	return cpu_rq(cpu)->cpu_capacity;
 }
 
 static unsigned long cpu_avg_load_per_task(int cpu)
@@ -4065,7 +4068,7 @@
 	 * about the boundary, really active task won't care
 	 * about the loss.
 	 */
-	if (jiffies > current->wakee_flip_decay_ts + HZ) {
+	if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
 		current->wakee_flips >>= 1;
 		current->wakee_flip_decay_ts = jiffies;
 	}
@@ -4286,12 +4289,12 @@
 		s64 this_eff_load, prev_eff_load;
 
 		this_eff_load = 100;
-		this_eff_load *= power_of(prev_cpu);
+		this_eff_load *= capacity_of(prev_cpu);
 		this_eff_load *= this_load +
 			effective_load(tg, this_cpu, weight, weight);
 
 		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
-		prev_eff_load *= power_of(this_cpu);
+		prev_eff_load *= capacity_of(this_cpu);
 		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
 
 		balanced = this_eff_load <= prev_eff_load;
@@ -4367,8 +4370,8 @@
 			avg_load += load;
 		}
 
-		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
+		/* Adjust by relative CPU capacity of the group */
+		avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -4948,14 +4951,14 @@
  *
  *   W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0               (3)
  *
- * P_i is the cpu power (or compute capacity) of cpu i, typically it is the
+ * C_i is the compute capacity of cpu i, typically it is the
  * fraction of 'recent' time available for SCHED_OTHER task execution. But it
  * can also include other factors [XXX].
  *
  * To achieve this balance we define a measure of imbalance which follows
  * directly from (1):
  *
- *   imb_i,j = max{ avg(W/P), W_i/P_i } - min{ avg(W/P), W_j/P_j }    (4)
+ *   imb_i,j = max{ avg(W/C), W_i/C_i } - min{ avg(W/C), W_j/C_j }    (4)
  *
  * We them move tasks around to minimize the imbalance. In the continuous
  * function space it is obvious this converges, in the discrete case we get
@@ -5530,13 +5533,13 @@
 	unsigned long group_load; /* Total load over the CPUs of the group */
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long load_per_task;
-	unsigned long group_power;
+	unsigned long group_capacity;
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
-	unsigned int group_capacity;
+	unsigned int group_capacity_factor;
 	unsigned int idle_cpus;
 	unsigned int group_weight;
 	int group_imb; /* Is there an imbalance in the group ? */
-	int group_has_capacity; /* Is there extra capacity in the group? */
+	int group_has_free_capacity;
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -5551,7 +5554,7 @@
 	struct sched_group *busiest;	/* Busiest group in this sd */
 	struct sched_group *local;	/* Local group in this sd */
 	unsigned long total_load;	/* Total load of all groups in sd */
-	unsigned long total_pwr;	/* Total power of all groups in sd */
+	unsigned long total_capacity;	/* Total capacity of all groups in sd */
 	unsigned long avg_load;	/* Average load across all groups in sd */
 
 	struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
@@ -5570,7 +5573,7 @@
 		.busiest = NULL,
 		.local = NULL,
 		.total_load = 0UL,
-		.total_pwr = 0UL,
+		.total_capacity = 0UL,
 		.busiest_stat = {
 			.avg_load = 0UL,
 		},
@@ -5605,17 +5608,17 @@
 	return load_idx;
 }
 
-static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
+static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
 {
-	return SCHED_POWER_SCALE;
+	return SCHED_CAPACITY_SCALE;
 }
 
-unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
-	return default_scale_freq_power(sd, cpu);
+	return default_scale_capacity(sd, cpu);
 }
 
-static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
 	unsigned long smt_gain = sd->smt_gain;
@@ -5625,12 +5628,12 @@
 	return smt_gain;
 }
 
-unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu)
 {
-	return default_scale_smt_power(sd, cpu);
+	return default_scale_smt_capacity(sd, cpu);
 }
 
-static unsigned long scale_rt_power(int cpu)
+static unsigned long scale_rt_capacity(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	u64 total, available, age_stamp, avg;
@@ -5650,71 +5653,71 @@
 	total = sched_avg_period() + delta;
 
 	if (unlikely(total < avg)) {
-		/* Ensures that power won't end up being negative */
+		/* Ensures that capacity won't end up being negative */
 		available = 0;
 	} else {
 		available = total - avg;
 	}
 
-	if (unlikely((s64)total < SCHED_POWER_SCALE))
-		total = SCHED_POWER_SCALE;
+	if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
+		total = SCHED_CAPACITY_SCALE;
 
-	total >>= SCHED_POWER_SHIFT;
+	total >>= SCHED_CAPACITY_SHIFT;
 
 	return div_u64(available, total);
 }
 
-static void update_cpu_power(struct sched_domain *sd, int cpu)
+static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
-	unsigned long power = SCHED_POWER_SCALE;
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-		if (sched_feat(ARCH_POWER))
-			power *= arch_scale_smt_power(sd, cpu);
+	if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) {
+		if (sched_feat(ARCH_CAPACITY))
+			capacity *= arch_scale_smt_capacity(sd, cpu);
 		else
-			power *= default_scale_smt_power(sd, cpu);
+			capacity *= default_scale_smt_capacity(sd, cpu);
 
-		power >>= SCHED_POWER_SHIFT;
+		capacity >>= SCHED_CAPACITY_SHIFT;
 	}
 
-	sdg->sgp->power_orig = power;
+	sdg->sgc->capacity_orig = capacity;
 
-	if (sched_feat(ARCH_POWER))
-		power *= arch_scale_freq_power(sd, cpu);
+	if (sched_feat(ARCH_CAPACITY))
+		capacity *= arch_scale_freq_capacity(sd, cpu);
 	else
-		power *= default_scale_freq_power(sd, cpu);
+		capacity *= default_scale_capacity(sd, cpu);
 
-	power >>= SCHED_POWER_SHIFT;
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
-	power *= scale_rt_power(cpu);
-	power >>= SCHED_POWER_SHIFT;
+	capacity *= scale_rt_capacity(cpu);
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
-	if (!power)
-		power = 1;
+	if (!capacity)
+		capacity = 1;
 
-	cpu_rq(cpu)->cpu_power = power;
-	sdg->sgp->power = power;
+	cpu_rq(cpu)->cpu_capacity = capacity;
+	sdg->sgc->capacity = capacity;
 }
 
-void update_group_power(struct sched_domain *sd, int cpu)
+void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long power, power_orig;
+	unsigned long capacity, capacity_orig;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
 	interval = clamp(interval, 1UL, max_load_balance_interval);
-	sdg->sgp->next_update = jiffies + interval;
+	sdg->sgc->next_update = jiffies + interval;
 
 	if (!child) {
-		update_cpu_power(sd, cpu);
+		update_cpu_capacity(sd, cpu);
 		return;
 	}
 
-	power_orig = power = 0;
+	capacity_orig = capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -5723,31 +5726,31 @@
 		 */
 
 		for_each_cpu(cpu, sched_group_cpus(sdg)) {
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
 			/*
-			 * build_sched_domains() -> init_sched_groups_power()
+			 * build_sched_domains() -> init_sched_groups_capacity()
 			 * gets here before we've attached the domains to the
 			 * runqueues.
 			 *
-			 * Use power_of(), which is set irrespective of domains
-			 * in update_cpu_power().
+			 * Use capacity_of(), which is set irrespective of domains
+			 * in update_cpu_capacity().
 			 *
-			 * This avoids power/power_orig from being 0 and
+			 * This avoids capacity/capacity_orig from being 0 and
 			 * causing divide-by-zero issues on boot.
 			 *
-			 * Runtime updates will correct power_orig.
+			 * Runtime updates will correct capacity_orig.
 			 */
 			if (unlikely(!rq->sd)) {
-				power_orig += power_of(cpu);
-				power += power_of(cpu);
+				capacity_orig += capacity_of(cpu);
+				capacity += capacity_of(cpu);
 				continue;
 			}
 
-			sgp = rq->sd->groups->sgp;
-			power_orig += sgp->power_orig;
-			power += sgp->power;
+			sgc = rq->sd->groups->sgc;
+			capacity_orig += sgc->capacity_orig;
+			capacity += sgc->capacity;
 		}
 	} else  {
 		/*
@@ -5757,14 +5760,14 @@
 
 		group = child->groups;
 		do {
-			power_orig += group->sgp->power_orig;
-			power += group->sgp->power;
+			capacity_orig += group->sgc->capacity_orig;
+			capacity += group->sgc->capacity;
 			group = group->next;
 		} while (group != child->groups);
 	}
 
-	sdg->sgp->power_orig = power_orig;
-	sdg->sgp->power = power;
+	sdg->sgc->capacity_orig = capacity_orig;
+	sdg->sgc->capacity = capacity;
 }
 
 /*
@@ -5778,15 +5781,15 @@
 fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 {
 	/*
-	 * Only siblings can have significantly less than SCHED_POWER_SCALE
+	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
 	 */
-	if (!(sd->flags & SD_SHARE_CPUPOWER))
+	if (!(sd->flags & SD_SHARE_CPUCAPACITY))
 		return 0;
 
 	/*
-	 * If ~90% of the cpu_power is still there, we're good.
+	 * If ~90% of the cpu_capacity is still there, we're good.
 	 */
-	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
+	if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
 		return 1;
 
 	return 0;
@@ -5823,34 +5826,35 @@
 
 static inline int sg_imbalanced(struct sched_group *group)
 {
-	return group->sgp->imbalance;
+	return group->sgc->imbalance;
 }
 
 /*
- * Compute the group capacity.
+ * Compute the group capacity factor.
  *
- * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by
+ * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
  * first dividing out the smt factor and computing the actual number of cores
- * and limit power unit capacity with that.
+ * and limit unit capacity with that.
  */
-static inline int sg_capacity(struct lb_env *env, struct sched_group *group)
+static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
 {
-	unsigned int capacity, smt, cpus;
-	unsigned int power, power_orig;
+	unsigned int capacity_factor, smt, cpus;
+	unsigned int capacity, capacity_orig;
 
-	power = group->sgp->power;
-	power_orig = group->sgp->power_orig;
+	capacity = group->sgc->capacity;
+	capacity_orig = group->sgc->capacity_orig;
 	cpus = group->group_weight;
 
-	/* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */
-	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig);
-	capacity = cpus / smt; /* cores */
+	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
+	smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
+	capacity_factor = cpus / smt; /* cores */
 
-	capacity = min_t(unsigned, capacity, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE));
-	if (!capacity)
-		capacity = fix_small_capacity(env->sd, group);
+	capacity_factor = min_t(unsigned,
+		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
+	if (!capacity_factor)
+		capacity_factor = fix_small_capacity(env->sd, group);
 
-	return capacity;
+	return capacity_factor;
 }
 
 /**
@@ -5890,9 +5894,9 @@
 			sgs->idle_cpus++;
 	}
 
-	/* Adjust by relative CPU power of the group */
-	sgs->group_power = group->sgp->power;
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power;
+	/* Adjust by relative CPU capacity of the group */
+	sgs->group_capacity = group->sgc->capacity;
+	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
 
 	if (sgs->sum_nr_running)
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -5900,10 +5904,10 @@
 	sgs->group_weight = group->group_weight;
 
 	sgs->group_imb = sg_imbalanced(group);
-	sgs->group_capacity = sg_capacity(env, group);
+	sgs->group_capacity_factor = sg_capacity_factor(env, group);
 
-	if (sgs->group_capacity > sgs->sum_nr_running)
-		sgs->group_has_capacity = 1;
+	if (sgs->group_capacity_factor > sgs->sum_nr_running)
+		sgs->group_has_free_capacity = 1;
 }
 
 /**
@@ -5927,7 +5931,7 @@
 	if (sgs->avg_load <= sds->busiest_stat.avg_load)
 		return false;
 
-	if (sgs->sum_nr_running > sgs->group_capacity)
+	if (sgs->sum_nr_running > sgs->group_capacity_factor)
 		return true;
 
 	if (sgs->group_imb)
@@ -6007,8 +6011,8 @@
 			sgs = &sds->local_stat;
 
 			if (env->idle != CPU_NEWLY_IDLE ||
-			    time_after_eq(jiffies, sg->sgp->next_update))
-				update_group_power(env->sd, env->dst_cpu);
+			    time_after_eq(jiffies, sg->sgc->next_update))
+				update_group_capacity(env->sd, env->dst_cpu);
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs);
@@ -6018,17 +6022,17 @@
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
-		 * first, lower the sg capacity to one so that we'll try
+		 * first, lower the sg capacity factor to one so that we'll try
 		 * and move all the excess tasks away. We lower the capacity
 		 * of a group only if the local group has the capacity to fit
-		 * these excess tasks, i.e. nr_running < group_capacity. The
+		 * these excess tasks, i.e. nr_running < group_capacity_factor. The
 		 * extra check prevents the case where you always pull from the
 		 * heaviest group when it is already under-utilized (possible
 		 * with a large weight task outweighs the tasks on the system).
 		 */
 		if (prefer_sibling && sds->local &&
-		    sds->local_stat.group_has_capacity)
-			sgs->group_capacity = min(sgs->group_capacity, 1U);
+		    sds->local_stat.group_has_free_capacity)
+			sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
 
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
 			sds->busiest = sg;
@@ -6038,7 +6042,7 @@
 next_group:
 		/* Now, start updating sd_lb_stats */
 		sds->total_load += sgs->group_load;
-		sds->total_pwr += sgs->group_power;
+		sds->total_capacity += sgs->group_capacity;
 
 		sg = sg->next;
 	} while (sg != env->sd->groups);
@@ -6085,8 +6089,8 @@
 		return 0;
 
 	env->imbalance = DIV_ROUND_CLOSEST(
-		sds->busiest_stat.avg_load * sds->busiest_stat.group_power,
-		SCHED_POWER_SCALE);
+		sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
+		SCHED_CAPACITY_SCALE);
 
 	return 1;
 }
@@ -6101,7 +6105,7 @@
 static inline
 void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 {
-	unsigned long tmp, pwr_now = 0, pwr_move = 0;
+	unsigned long tmp, capa_now = 0, capa_move = 0;
 	unsigned int imbn = 2;
 	unsigned long scaled_busy_load_per_task;
 	struct sg_lb_stats *local, *busiest;
@@ -6115,8 +6119,8 @@
 		imbn = 1;
 
 	scaled_busy_load_per_task =
-		(busiest->load_per_task * SCHED_POWER_SCALE) /
-		busiest->group_power;
+		(busiest->load_per_task * SCHED_CAPACITY_SCALE) /
+		busiest->group_capacity;
 
 	if (busiest->avg_load + scaled_busy_load_per_task >=
 	    local->avg_load + (scaled_busy_load_per_task * imbn)) {
@@ -6126,38 +6130,38 @@
 
 	/*
 	 * OK, we don't have enough imbalance to justify moving tasks,
-	 * however we may be able to increase total CPU power used by
+	 * however we may be able to increase total CPU capacity used by
 	 * moving them.
 	 */
 
-	pwr_now += busiest->group_power *
+	capa_now += busiest->group_capacity *
 			min(busiest->load_per_task, busiest->avg_load);
-	pwr_now += local->group_power *
+	capa_now += local->group_capacity *
 			min(local->load_per_task, local->avg_load);
-	pwr_now /= SCHED_POWER_SCALE;
+	capa_now /= SCHED_CAPACITY_SCALE;
 
 	/* Amount of load we'd subtract */
 	if (busiest->avg_load > scaled_busy_load_per_task) {
-		pwr_move += busiest->group_power *
+		capa_move += busiest->group_capacity *
 			    min(busiest->load_per_task,
 				busiest->avg_load - scaled_busy_load_per_task);
 	}
 
 	/* Amount of load we'd add */
-	if (busiest->avg_load * busiest->group_power <
-	    busiest->load_per_task * SCHED_POWER_SCALE) {
-		tmp = (busiest->avg_load * busiest->group_power) /
-		      local->group_power;
+	if (busiest->avg_load * busiest->group_capacity <
+	    busiest->load_per_task * SCHED_CAPACITY_SCALE) {
+		tmp = (busiest->avg_load * busiest->group_capacity) /
+		      local->group_capacity;
 	} else {
-		tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
-		      local->group_power;
+		tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
+		      local->group_capacity;
 	}
-	pwr_move += local->group_power *
+	capa_move += local->group_capacity *
 		    min(local->load_per_task, local->avg_load + tmp);
-	pwr_move /= SCHED_POWER_SCALE;
+	capa_move /= SCHED_CAPACITY_SCALE;
 
 	/* Move if we gain throughput */
-	if (pwr_move > pwr_now)
+	if (capa_move > capa_now)
 		env->imbalance = busiest->load_per_task;
 }
 
@@ -6187,7 +6191,7 @@
 	/*
 	 * In the presence of smp nice balancing, certain scenarios can have
 	 * max load less than avg load(as we skip the groups at or below
-	 * its cpu_power, while calculating max_load..)
+	 * its cpu_capacity, while calculating max_load..)
 	 */
 	if (busiest->avg_load <= sds->avg_load ||
 	    local->avg_load >= sds->avg_load) {
@@ -6202,10 +6206,10 @@
 		 * have to drop below capacity to reach cpu-load equilibrium.
 		 */
 		load_above_capacity =
-			(busiest->sum_nr_running - busiest->group_capacity);
+			(busiest->sum_nr_running - busiest->group_capacity_factor);
 
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
-		load_above_capacity /= busiest->group_power;
+		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
+		load_above_capacity /= busiest->group_capacity;
 	}
 
 	/*
@@ -6220,9 +6224,9 @@
 
 	/* How much load to actually move to equalise the imbalance */
 	env->imbalance = min(
-		max_pull * busiest->group_power,
-		(sds->avg_load - local->avg_load) * local->group_power
-	) / SCHED_POWER_SCALE;
+		max_pull * busiest->group_capacity,
+		(sds->avg_load - local->avg_load) * local->group_capacity
+	) / SCHED_CAPACITY_SCALE;
 
 	/*
 	 * if *imbalance is less than the average load per runnable task
@@ -6276,7 +6280,8 @@
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
+	sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load)
+						/ sds.total_capacity;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -6287,8 +6292,8 @@
 		goto force_balance;
 
 	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-	if (env->idle == CPU_NEWLY_IDLE && local->group_has_capacity &&
-	    !busiest->group_has_capacity)
+	if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
+	    !busiest->group_has_free_capacity)
 		goto force_balance;
 
 	/*
@@ -6342,11 +6347,11 @@
 				     struct sched_group *group)
 {
 	struct rq *busiest = NULL, *rq;
-	unsigned long busiest_load = 0, busiest_power = 1;
+	unsigned long busiest_load = 0, busiest_capacity = 1;
 	int i;
 
 	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
-		unsigned long power, capacity, wl;
+		unsigned long capacity, capacity_factor, wl;
 		enum fbq_type rt;
 
 		rq = cpu_rq(i);
@@ -6374,34 +6379,34 @@
 		if (rt > env->fbq_type)
 			continue;
 
-		power = power_of(i);
-		capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
-		if (!capacity)
-			capacity = fix_small_capacity(env->sd, group);
+		capacity = capacity_of(i);
+		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
+		if (!capacity_factor)
+			capacity_factor = fix_small_capacity(env->sd, group);
 
 		wl = weighted_cpuload(i);
 
 		/*
 		 * When comparing with imbalance, use weighted_cpuload()
-		 * which is not scaled with the cpu power.
+		 * which is not scaled with the cpu capacity.
 		 */
-		if (capacity && rq->nr_running == 1 && wl > env->imbalance)
+		if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
 			continue;
 
 		/*
 		 * For the load comparisons with the other cpu's, consider
-		 * the weighted_cpuload() scaled with the cpu power, so that
-		 * the load can be moved away from the cpu that is potentially
-		 * running at a lower capacity.
+		 * the weighted_cpuload() scaled with the cpu capacity, so
+		 * that the load can be moved away from the cpu that is
+		 * potentially running at a lower capacity.
 		 *
-		 * Thus we're looking for max(wl_i / power_i), crosswise
+		 * Thus we're looking for max(wl_i / capacity_i), crosswise
 		 * multiplication to rid ourselves of the division works out
-		 * to: wl_i * power_j > wl_j * power_i;  where j is our
-		 * previous maximum.
+		 * to: wl_i * capacity_j > wl_j * capacity_i;  where j is
+		 * our previous maximum.
 		 */
-		if (wl * busiest_power > busiest_load * power) {
+		if (wl * busiest_capacity > busiest_load * capacity) {
 			busiest_load = wl;
-			busiest_power = power;
+			busiest_capacity = capacity;
 			busiest = rq;
 		}
 	}
@@ -6609,7 +6614,7 @@
 		 * We failed to reach balance because of affinity.
 		 */
 		if (sd_parent) {
-			int *group_imbalance = &sd_parent->groups->sgp->imbalance;
+			int *group_imbalance = &sd_parent->groups->sgc->imbalance;
 
 			if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
 				*group_imbalance = 1;
@@ -6996,7 +7001,7 @@
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	atomic_inc(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7013,7 +7018,7 @@
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	atomic_dec(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7192,12 +7197,17 @@
 
 		rq = cpu_rq(balance_cpu);
 
-		raw_spin_lock_irq(&rq->lock);
-		update_rq_clock(rq);
-		update_idle_cpu_load(rq);
-		raw_spin_unlock_irq(&rq->lock);
-
-		rebalance_domains(rq, CPU_IDLE);
+		/*
+		 * If time for next balance is due,
+		 * do the balance.
+		 */
+		if (time_after_eq(jiffies, rq->next_balance)) {
+			raw_spin_lock_irq(&rq->lock);
+			update_rq_clock(rq);
+			update_idle_cpu_load(rq);
+			raw_spin_unlock_irq(&rq->lock);
+			rebalance_domains(rq, CPU_IDLE);
+		}
 
 		if (time_after(this_rq->next_balance, rq->next_balance))
 			this_rq->next_balance = rq->next_balance;
@@ -7212,7 +7222,7 @@
  * of an idle cpu is the system.
  *   - This rq has more than one task.
  *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's power.
+ *     busy cpu's exceeding the group's capacity.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
@@ -7220,7 +7230,7 @@
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 	int nr_busy, cpu = rq->cpu;
 
 	if (unlikely(rq->idle_balance))
@@ -7250,8 +7260,8 @@
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (sd) {
-		sgp = sd->groups->sgp;
-		nr_busy = atomic_read(&sgp->nr_busy_cpus);
+		sgc = sd->groups->sgc;
+		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
 		if (nr_busy > 1)
 			goto need_kick_unlock;

diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5716929..90284d1 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h

@@ -37,18 +37,18 @@
 SCHED_FEAT(WAKEUP_PREEMPTION, true)
 
 /*
- * Use arch dependent cpu power functions
+ * Use arch dependent cpu capacity functions
  */
-SCHED_FEAT(ARCH_POWER, true)
+SCHED_FEAT(ARCH_CAPACITY, true)
 
 SCHED_FEAT(HRTICK, false)
 SCHED_FEAT(DOUBLE_TICK, false)
 SCHED_FEAT(LB_BIAS, true)
 
 /*
- * Decrement CPU power based on time not spent running tasks
+ * Decrement CPU capacity based on time not spent running tasks
  */
-SCHED_FEAT(NONTASK_POWER, true)
+SCHED_FEAT(NONTASK_CAPACITY, true)
 
 /*
  * Queue remote wakeups on the target CPU and process them

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 25b9423..cf009fb 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c

@@ -12,6 +12,8 @@
 
 #include <trace/events/power.h>
 
+#include "sched.h"
+
 static int __read_mostly cpu_idle_force_poll;
 
 void cpu_idle_poll_ctrl(bool enable)
@@ -67,6 +69,10 @@
  * cpuidle_idle_call - the main idle function
  *
  * NOTE: no locks or semaphores should be used here
+ *
+ * On archs that support TIF_POLLING_NRFLAG, is called with polling
+ * set, and it returns with polling set.  If it ever stops polling, it
+ * must clear the polling bit.
  */
 static void cpuidle_idle_call(void)
 {
@@ -175,10 +181,22 @@
 
 /*
  * Generic idle loop implementation
+ *
+ * Called with polling cleared.
  */
 static void cpu_idle_loop(void)
 {
 	while (1) {
+		/*
+		 * If the arch has a polling bit, we maintain an invariant:
+		 *
+		 * Our polling bit is clear if we're not scheduled (i.e. if
+		 * rq->curr != rq->idle).  This means that, if rq->idle has
+		 * the polling bit set, then setting need_resched is
+		 * guaranteed to cause the cpu to reschedule.
+		 */
+
+		__current_set_polling();
 		tick_nohz_idle_enter();
 
 		while (!need_resched()) {
@@ -218,6 +236,17 @@
 		 */
 		preempt_set_need_resched();
 		tick_nohz_idle_exit();
+		__current_clr_polling();
+
+		/*
+		 * We promise to call sched_ttwu_pending and reschedule
+		 * if need_resched is set while polling is set.  That
+		 * means that clearing polling needs to be visible
+		 * before doing these things.
+		 */
+		smp_mb__after_atomic();
+
+		sched_ttwu_pending();
 		schedule_preempt_disabled();
 	}
 }
@@ -239,7 +268,6 @@
 	 */
 	boot_init_stack_canary();
 #endif
-	__current_set_polling();
 	arch_cpu_idle_prepare();
 	cpu_idle_loop();
 }

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b3512f1..a490831 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c

@@ -918,7 +918,6 @@
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_rt_entity *rt_se = &curr->rt;
-	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	u64 delta_exec;
 
 	if (curr->sched_class != &rt_sched_class)
@@ -943,7 +942,7 @@
 		return;
 
 	for_each_sched_rt_entity(rt_se) {
-		rt_rq = rt_rq_of_se(rt_se);
+		struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 
 		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 			raw_spin_lock(&rt_rq->rt_runtime_lock);

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e47679b..31cc02e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -567,7 +567,7 @@
 	struct root_domain *rd;
 	struct sched_domain *sd;
 
-	unsigned long cpu_power;
+	unsigned long cpu_capacity;
 
 	unsigned char idle_balance;
 	/* For active balancing */
@@ -670,6 +670,8 @@
 
 #ifdef CONFIG_SMP
 
+extern void sched_ttwu_pending(void);
+
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
 			      lockdep_is_held(&sched_domains_mutex))
@@ -728,15 +730,15 @@
 DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
 
-struct sched_group_power {
+struct sched_group_capacity {
 	atomic_t ref;
 	/*
-	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
-	 * single CPU.
+	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
+	 * for a single CPU.
 	 */
-	unsigned int power, power_orig;
+	unsigned int capacity, capacity_orig;
 	unsigned long next_update;
-	int imbalance; /* XXX unrelated to power but shared group state */
+	int imbalance; /* XXX unrelated to capacity but shared group state */
 	/*
 	 * Number of busy cpus in this group.
 	 */
@@ -750,7 +752,7 @@
 	atomic_t ref;
 
 	unsigned int group_weight;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 
 	/*
 	 * The CPUs this group covers.
@@ -773,7 +775,7 @@
  */
 static inline struct cpumask *sched_group_mask(struct sched_group *sg)
 {
-	return to_cpumask(sg->sgp->cpumask);
+	return to_cpumask(sg->sgc->cpumask);
 }
 
 /**
@@ -787,6 +789,10 @@
 
 extern int group_balance_cpu(struct sched_group *sg);
 
+#else
+
+static inline void sched_ttwu_pending(void) { }
+
 #endif /* CONFIG_SMP */
 
 #include "stats.h"
@@ -1167,7 +1173,7 @@
 
 #ifdef CONFIG_SMP
 
-extern void update_group_power(struct sched_domain *sd, int cpu);
+extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
 extern void trigger_load_balance(struct rq *rq);
 

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c634868..7c56c3d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c

@@ -543,7 +543,7 @@
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	DEFINE_WAIT(wait);
@@ -557,6 +557,8 @@
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {
+		if (!cpumask_test_cpu(cpu, buffer->cpumask))
+			return -ENODEV;
 		cpu_buffer = buffer->buffers[cpu];
 		work = &cpu_buffer->irq_work;
 	}
@@ -591,6 +593,7 @@
 		schedule();
 
 	finish_wait(&work->waiters, &wait);
+	return 0;
 }
 
 /**

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 16f7038..384ede3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -1085,13 +1085,13 @@
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static void wait_on_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
-		return;
+		return 0;
 
-	ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -1338,7 +1338,7 @@
 {
 	int ret;
 
-	savedcmd = kmalloc(sizeof(struct saved_cmdlines_buffer), GFP_KERNEL);
+	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
 	if (!savedcmd)
 		return -ENOMEM;
 
@@ -3840,7 +3840,7 @@
 	int r;
 
 	arch_spin_lock(&trace_cmdline_lock);
-	r = sprintf(buf, "%u\n", savedcmd->cmdline_num);
+	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
 	arch_spin_unlock(&trace_cmdline_lock);
 
 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -3857,7 +3857,7 @@
 {
 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
 
-	s = kmalloc(sizeof(struct saved_cmdlines_buffer), GFP_KERNEL);
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		return -ENOMEM;
 
@@ -4378,6 +4378,7 @@
 static int tracing_wait_pipe(struct file *filp)
 {
 	struct trace_iterator *iter = filp->private_data;
+	int ret;
 
 	while (trace_empty(iter)) {
 
@@ -4399,10 +4400,13 @@
 
 		mutex_unlock(&iter->mutex);
 
-		wait_on_pipe(iter);
+		ret = wait_on_pipe(iter);
 
 		mutex_lock(&iter->mutex);
 
+		if (ret)
+			return ret;
+
 		if (signal_pending(current))
 			return -EINTR;
 	}
@@ -5327,8 +5331,12 @@
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			wait_on_pipe(iter);
+			ret = wait_on_pipe(iter);
 			mutex_lock(&trace_types_lock);
+			if (ret) {
+				size = ret;
+				goto out_unlock;
+			}
 			if (signal_pending(current)) {
 				size = -EINTR;
 				goto out_unlock;
@@ -5538,8 +5546,10 @@
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		wait_on_pipe(iter);
+		ret = wait_on_pipe(iter);
 		mutex_lock(&trace_types_lock);
+		if (ret)
+			goto out;
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			goto out;
@@ -6232,22 +6242,25 @@
 	return 0;
 }
 
+static void free_trace_buffer(struct trace_buffer *buf)
+{
+	if (buf->buffer) {
+		ring_buffer_free(buf->buffer);
+		buf->buffer = NULL;
+		free_percpu(buf->data);
+		buf->data = NULL;
+	}
+}
+
 static void free_trace_buffers(struct trace_array *tr)
 {
 	if (!tr)
 		return;
 
-	if (tr->trace_buffer.buffer) {
-		ring_buffer_free(tr->trace_buffer.buffer);
-		tr->trace_buffer.buffer = NULL;
-		free_percpu(tr->trace_buffer.data);
-	}
+	free_trace_buffer(&tr->trace_buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-	if (tr->max_buffer.buffer) {
-		ring_buffer_free(tr->max_buffer.buffer);
-		tr->max_buffer.buffer = NULL;
-	}
+	free_trace_buffer(&tr->max_buffer);
 #endif
 }
 

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9e82551..9258f5a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h

@@ -252,7 +252,7 @@
 {
 	struct trace_array *tr;
 
-	if (list_empty(ftrace_trace_arrays.prev))
+	if (list_empty(&ftrace_trace_arrays))
 		return NULL;
 
 	tr = list_entry(ftrace_trace_arrays.prev,

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index c894614..5d12bb4 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c

@@ -248,8 +248,8 @@
 	tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
 }
 
-__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
-				       struct pt_regs *regs, int *rctxp)
+void *perf_trace_buf_prepare(int size, unsigned short type,
+			     struct pt_regs *regs, int *rctxp)
 {
 	struct trace_entry *entry;
 	unsigned long flags;
@@ -281,6 +281,7 @@
 	return raw_data;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+NOKPROBE_SYMBOL(perf_trace_buf_prepare);
 
 #ifdef CONFIG_FUNCTION_TRACER
 static void

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ef2fba1..282f6e4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c

@@ -40,27 +40,27 @@
 	(sizeof(struct probe_arg) * (n)))
 
 
-static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
 	return tk->rp.handler != NULL;
 }
 
-static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk)
+static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
 {
 	return tk->symbol ? tk->symbol : "unknown";
 }
 
-static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
+static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
 {
 	return tk->rp.kp.offset;
 }
 
-static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
 {
 	return !!(kprobe_gone(&tk->rp.kp));
 }
 
-static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
+static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
 						 struct module *mod)
 {
 	int len = strlen(mod->name);
@@ -68,7 +68,7 @@
 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
 }
 
-static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
 {
 	return !!strchr(trace_kprobe_symbol(tk), ':');
 }
@@ -132,19 +132,21 @@
  * Kprobes-specific fetch functions
  */
 #define DEFINE_FETCH_stack(type)					\
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
 					  void *offset, void *dest)	\
 {									\
 	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
 				(unsigned int)((unsigned long)offset));	\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
+
 DEFINE_BASIC_FETCH_FUNCS(stack)
 /* No string on the stack entry */
 #define fetch_stack_string	NULL
 #define fetch_stack_string_size	NULL
 
 #define DEFINE_FETCH_memory(type)					\
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
 					  void *addr, void *dest)	\
 {									\
 	type retval;							\
@@ -152,14 +154,16 @@
 		*(type *)dest = 0;					\
 	else								\
 		*(type *)dest = retval;					\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
+
 DEFINE_BASIC_FETCH_FUNCS(memory)
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
  */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
-						      void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+					    void *addr, void *dest)
 {
 	long ret;
 	int maxlen = get_rloc_len(*(u32 *)dest);
@@ -193,10 +197,11 @@
 					      get_rloc_offs(*(u32 *)dest));
 	}
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
 
 /* Return the length of string -- including null terminal byte */
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
-							void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+						 void *addr, void *dest)
 {
 	mm_segment_t old_fs;
 	int ret, len = 0;
@@ -219,17 +224,19 @@
 	else
 		*(u32 *)dest = len;
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
 
 #define DEFINE_FETCH_symbol(type)					\
-__kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,	\
-					  void *data, void *dest)	\
+void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
 {									\
 	struct symbol_cache *sc = data;					\
 	if (sc->addr)							\
 		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
 	else								\
 		*(type *)dest = 0;					\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
+
 DEFINE_BASIC_FETCH_FUNCS(symbol)
 DEFINE_FETCH_symbol(string)
 DEFINE_FETCH_symbol(string_size)
@@ -907,7 +914,7 @@
 };
 
 /* Kprobe handler */
-static __kprobes void
+static nokprobe_inline void
 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
 		    struct ftrace_event_file *ftrace_file)
 {
@@ -943,7 +950,7 @@
 					 entry, irq_flags, pc, regs);
 }
 
-static __kprobes void
+static void
 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct event_file_link *link;
@@ -951,9 +958,10 @@
 	list_for_each_entry_rcu(link, &tk->tp.files, list)
 		__kprobe_trace_func(tk, regs, link->file);
 }
+NOKPROBE_SYMBOL(kprobe_trace_func);
 
 /* Kretprobe handler */
-static __kprobes void
+static nokprobe_inline void
 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		       struct pt_regs *regs,
 		       struct ftrace_event_file *ftrace_file)
@@ -991,7 +999,7 @@
 					 entry, irq_flags, pc, regs);
 }
 
-static __kprobes void
+static void
 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		     struct pt_regs *regs)
 {
@@ -1000,6 +1008,7 @@
 	list_for_each_entry_rcu(link, &tk->tp.files, list)
 		__kretprobe_trace_func(tk, ri, regs, link->file);
 }
+NOKPROBE_SYMBOL(kretprobe_trace_func);
 
 /* Event entry printers */
 static enum print_line_t
@@ -1131,7 +1140,7 @@
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes void
+static void
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
@@ -1158,9 +1167,10 @@
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
 }
+NOKPROBE_SYMBOL(kprobe_perf_func);
 
 /* Kretprobe profile handler */
-static __kprobes void
+static void
 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		    struct pt_regs *regs)
 {
@@ -1188,6 +1198,7 @@
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
 }
+NOKPROBE_SYMBOL(kretprobe_perf_func);
 #endif	/* CONFIG_PERF_EVENTS */
 
 /*
@@ -1196,9 +1207,8 @@
  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
  * lockless, but we can't race with this __init function.
  */
-static __kprobes
-int kprobe_register(struct ftrace_event_call *event,
-		    enum trace_reg type, void *data)
+static int kprobe_register(struct ftrace_event_call *event,
+			   enum trace_reg type, void *data)
 {
 	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
 	struct ftrace_event_file *file = data;
@@ -1224,8 +1234,7 @@
 	return 0;
 }
 
-static __kprobes
-int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
@@ -1239,9 +1248,10 @@
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
+NOKPROBE_SYMBOL(kprobe_dispatcher);
 
-static __kprobes
-int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+static int
+kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
@@ -1255,6 +1265,7 @@
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
+NOKPROBE_SYMBOL(kretprobe_dispatcher);
 
 static struct trace_event_functions kretprobe_funcs = {
 	.trace		= print_kretprobe_event

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 8364a42..d4b9fc2 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c

@@ -37,13 +37,13 @@
 
 /* Printing  in basic type function template */
 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt)				\
-__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\
-						const char *name,	\
-						void *data, void *ent)	\
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\
+				void *data, void *ent)			\
 {									\
 	return trace_seq_printf(s, " %s=" fmt, name, *(type *)data);	\
 }									\
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
+const char PRINT_TYPE_FMT_NAME(type)[] = fmt;				\
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
 
 DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
@@ -55,9 +55,8 @@
 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
 
 /* Print type function for string type */
-__kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
-						  const char *name,
-						  void *data, void *ent)
+int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
+				 void *data, void *ent)
 {
 	int len = *(u32 *)data >> 16;
 
@@ -67,6 +66,7 @@
 		return trace_seq_printf(s, " %s=\"%s\"", name,
 					(const char *)get_loc_data(data, ent));
 }
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
 
 const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
 
@@ -81,23 +81,24 @@
 
 /* Data fetch function templates */
 #define DEFINE_FETCH_reg(type)						\
-__kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,		\
-					void *offset, void *dest)	\
+void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest)	\
 {									\
 	*(type *)dest = (type)regs_get_register(regs,			\
 				(unsigned int)((unsigned long)offset));	\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
 DEFINE_BASIC_FETCH_FUNCS(reg)
 /* No string on the register */
 #define fetch_reg_string	NULL
 #define fetch_reg_string_size	NULL
 
 #define DEFINE_FETCH_retval(type)					\
-__kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,	\
-					  void *dummy, void *dest)	\
+void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,		\
+				   void *dummy, void *dest)		\
 {									\
 	*(type *)dest = (type)regs_return_value(regs);			\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
 DEFINE_BASIC_FETCH_FUNCS(retval)
 /* No string on the retval */
 #define fetch_retval_string		NULL
@@ -112,8 +113,8 @@
 };
 
 #define DEFINE_FETCH_deref(type)					\
-__kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,	\
-					    void *data, void *dest)	\
+void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,			\
+				  void *data, void *dest)		\
 {									\
 	struct deref_fetch_param *dprm = data;				\
 	unsigned long addr;						\
@@ -123,12 +124,13 @@
 		dprm->fetch(regs, (void *)addr, dest);			\
 	} else								\
 		*(type *)dest = 0;					\
-}
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
 DEFINE_BASIC_FETCH_FUNCS(deref)
 DEFINE_FETCH_deref(string)
 
-__kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
-						   void *data, void *dest)
+void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
+					 void *data, void *dest)
 {
 	struct deref_fetch_param *dprm = data;
 	unsigned long addr;
@@ -140,16 +142,18 @@
 	} else
 		*(string_size *)dest = 0;
 }
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
 
-static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+static void update_deref_fetch_param(struct deref_fetch_param *data)
 {
 	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
 		update_deref_fetch_param(data->orig.data);
 	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
 		update_symbol_cache(data->orig.data);
 }
+NOKPROBE_SYMBOL(update_deref_fetch_param);
 
-static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
+static void free_deref_fetch_param(struct deref_fetch_param *data)
 {
 	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
 		free_deref_fetch_param(data->orig.data);
@@ -157,6 +161,7 @@
 		free_symbol_cache(data->orig.data);
 	kfree(data);
 }
+NOKPROBE_SYMBOL(free_deref_fetch_param);
 
 /* Bitfield fetch function */
 struct bitfield_fetch_param {
@@ -166,8 +171,8 @@
 };
 
 #define DEFINE_FETCH_bitfield(type)					\
-__kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,	\
-					    void *data, void *dest)	\
+void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,		\
+				     void *data, void *dest)		\
 {									\
 	struct bitfield_fetch_param *bprm = data;			\
 	type buf = 0;							\
@@ -177,13 +182,13 @@
 		buf >>= bprm->low_shift;				\
 	}								\
 	*(type *)dest = buf;						\
-}
-
+}									\
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
 DEFINE_BASIC_FETCH_FUNCS(bitfield)
 #define fetch_bitfield_string		NULL
 #define fetch_bitfield_string_size	NULL
 
-static __kprobes void
+static void
 update_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
 	/*
@@ -196,7 +201,7 @@
 		update_symbol_cache(data->orig.data);
 }
 
-static __kprobes void
+static void
 free_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
 	/*
@@ -255,17 +260,17 @@
 }
 
 /* Special function : only accept unsigned long */
-static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs,
-						 void *dummy, void *dest)
+static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
 {
 	*(unsigned long *)dest = kernel_stack_pointer(regs);
 }
+NOKPROBE_SYMBOL(fetch_kernel_stack_address);
 
-static __kprobes void fetch_user_stack_address(struct pt_regs *regs,
-					       void *dummy, void *dest)
+static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
 {
 	*(unsigned long *)dest = user_stack_pointer(regs);
 }
+NOKPROBE_SYMBOL(fetch_user_stack_address);
 
 static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
 					    fetch_func_t orig_fn,

diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb1ab5d..4f815fb 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h

@@ -81,13 +81,13 @@
  */
 #define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs))
 
-static inline void *get_rloc_data(u32 *dl)
+static nokprobe_inline void *get_rloc_data(u32 *dl)
 {
 	return (u8 *)dl + get_rloc_offs(*dl);
 }
 
 /* For data_loc conversion */
-static inline void *get_loc_data(u32 *dl, void *ent)
+static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
 {
 	return (u8 *)ent + get_rloc_offs(*dl);
 }
@@ -136,9 +136,8 @@
 
 /* Printing  in basic type function template */
 #define DECLARE_BASIC_PRINT_TYPE_FUNC(type)				\
-__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,		\
-					 const char *name,		\
-					 void *data, void *ent);	\
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\
+				void *data, void *ent);			\
 extern const char PRINT_TYPE_FMT_NAME(type)[]
 
 DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
@@ -303,7 +302,7 @@
 	return !!(tp->flags & TP_FLAG_REGISTERED);
 }
 
-static inline __kprobes void call_fetch(struct fetch_param *fprm,
+static nokprobe_inline void call_fetch(struct fetch_param *fprm,
 				 struct pt_regs *regs, void *dest)
 {
 	return fprm->fn(regs, fprm->data, dest);
@@ -351,7 +350,7 @@
 extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
 
 /* Sum up total data length for dynamic arraies (strings) */
-static inline __kprobes int
+static nokprobe_inline int
 __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
 {
 	int i, ret = 0;
@@ -367,7 +366,7 @@
 }
 
 /* Store the value of each argument */
-static inline __kprobes void
+static nokprobe_inline void
 store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
 		 u8 *data, int maxlen)
 {

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c082a74..04fdb5d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c

@@ -108,8 +108,8 @@
  * Uprobes-specific fetch functions
  */
 #define DEFINE_FETCH_stack(type)					\
-static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
-					  void *offset, void *dest)	\
+static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
+					 void *offset, void *dest)	\
 {									\
 	*(type *)dest = (type)get_user_stack_nth(regs,			\
 					      ((unsigned long)offset)); \
@@ -120,8 +120,8 @@
 #define fetch_stack_string_size	NULL
 
 #define DEFINE_FETCH_memory(type)					\
-static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
-						void *addr, void *dest) \
+static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
+					  void *addr, void *dest)	\
 {									\
 	type retval;							\
 	void __user *vaddr = (void __force __user *) addr;		\
@@ -136,8 +136,8 @@
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
  */
-static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
-						      void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
+					    void *addr, void *dest)
 {
 	long ret;
 	u32 rloc = *(u32 *)dest;
@@ -158,8 +158,8 @@
 	}
 }
 
-static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
-						      void *addr, void *dest)
+static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
+						 void *addr, void *dest)
 {
 	int len;
 	void __user *vaddr = (void __force __user *) addr;
@@ -184,8 +184,8 @@
 }
 
 #define DEFINE_FETCH_file_offset(type)					\
-static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\
-					void *offset, void *dest) 	\
+static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,	\
+					       void *offset, void *dest)\
 {									\
 	void *vaddr = (void *)translate_user_vaddr(offset);		\
 									\
@@ -1009,9 +1009,32 @@
 	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
 }
 
+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
+{
+	bool done;
+
+	write_lock(&tu->filter.rwlock);
+	if (event->hw.tp_target) {
+		list_del(&event->hw.tp_list);
+		done = tu->filter.nr_systemwide ||
+			(event->hw.tp_target->flags & PF_EXITING) ||
+			uprobe_filter_event(tu, event);
+	} else {
+		tu->filter.nr_systemwide--;
+		done = tu->filter.nr_systemwide;
+	}
+	write_unlock(&tu->filter.rwlock);
+
+	if (!done)
+		return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
+
+	return 0;
+}
+
 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
 {
 	bool done;
+	int err;
 
 	write_lock(&tu->filter.rwlock);
 	if (event->hw.tp_target) {
@@ -1033,32 +1056,13 @@
 	}
 	write_unlock(&tu->filter.rwlock);
 
-	if (!done)
-		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
-
-	return 0;
-}
-
-static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
-{
-	bool done;
-
-	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
-		list_del(&event->hw.tp_list);
-		done = tu->filter.nr_systemwide ||
-			(event->hw.tp_target->flags & PF_EXITING) ||
-			uprobe_filter_event(tu, event);
-	} else {
-		tu->filter.nr_systemwide--;
-		done = tu->filter.nr_systemwide;
+	err = 0;
+	if (!done) {
+		err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+		if (err)
+			uprobe_perf_close(tu, event);
 	}
-	write_unlock(&tu->filter.rwlock);
-
-	if (!done)
-		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
-
-	return 0;
+	return err;
 }
 
 static bool uprobe_perf_filter(struct uprobe_consumer *uc,

diff --git a/mm/page_io.c b/mm/page_io.c
index 243a9b7..955db8b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c

@@ -274,8 +274,8 @@
 			.count = PAGE_SIZE,
 			.iov_offset = 0,
 			.nr_segs = 1,
-			.bvec = &bv
 		};
+		from.bvec = &bv;	/* older gcc versions are broken */
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721..1675021 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -72,6 +72,8 @@
 	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
 	case CEPH_MSG_STATFS: return "statfs";
 	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+	case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
 	case CEPH_MSG_MDS_MAP: return "mds_map";
 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";

diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 10421a4..d1a62c6 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c

@@ -126,9 +126,13 @@
 		req = rb_entry(rp, struct ceph_mon_generic_request, node);
 		op = le16_to_cpu(req->request->hdr.type);
 		if (op == CEPH_MSG_STATFS)
-			seq_printf(s, "%lld statfs\n", req->tid);
+			seq_printf(s, "%llu statfs\n", req->tid);
+		else if (op == CEPH_MSG_POOLOP)
+			seq_printf(s, "%llu poolop\n", req->tid);
+		else if (op == CEPH_MSG_MON_GET_VERSION)
+			seq_printf(s, "%llu mon_get_version", req->tid);
 		else
-			seq_printf(s, "%lld unknown\n", req->tid);
+			seq_printf(s, "%llu unknown\n", req->tid);
 	}
 
 	mutex_unlock(&monc->mutex);

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef3..067d3af 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -296,6 +296,33 @@
 		__send_subscribe(monc);
 	mutex_unlock(&monc->mutex);
 }
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+			  unsigned long timeout)
+{
+	unsigned long started = jiffies;
+	int ret;
+
+	mutex_lock(&monc->mutex);
+	while (monc->have_osdmap < epoch) {
+		mutex_unlock(&monc->mutex);
+
+		if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+			return -ETIMEDOUT;
+
+		ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+					 monc->have_osdmap >= epoch, timeout);
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&monc->mutex);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
 
 /*
  *
@@ -477,14 +504,13 @@
 	return m;
 }
 
-static int do_generic_request(struct ceph_mon_client *monc,
-			      struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc, u64 tid,
+				struct ceph_mon_generic_request *req)
 {
 	int err;
 
 	/* register request */
-	mutex_lock(&monc->mutex);
-	req->tid = ++monc->last_tid;
+	req->tid = tid != 0 ? tid : ++monc->last_tid;
 	req->request->hdr.tid = cpu_to_le64(req->tid);
 	__insert_generic_request(monc, req);
 	monc->num_generic_requests++;
@@ -496,13 +522,24 @@
 	mutex_lock(&monc->mutex);
 	rb_erase(&req->node, &monc->generic_request_tree);
 	monc->num_generic_requests--;
-	mutex_unlock(&monc->mutex);
 
 	if (!err)
 		err = req->result;
 	return err;
 }
 
+static int do_generic_request(struct ceph_mon_client *monc,
+			      struct ceph_mon_generic_request *req)
+{
+	int err;
+
+	mutex_lock(&monc->mutex);
+	err = __do_generic_request(monc, 0, req);
+	mutex_unlock(&monc->mutex);
+
+	return err;
+}
+
 /*
  * statfs
  */
@@ -579,6 +616,96 @@
 }
 EXPORT_SYMBOL(ceph_monc_do_statfs);
 
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+				     struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+	void *p = msg->front.iov_base;
+	void *end = p + msg->front_alloc_len;
+	u64 handle;
+
+	dout("%s %p tid %llu\n", __func__, msg, tid);
+
+	ceph_decode_need(&p, end, 2*sizeof(u64), bad);
+	handle = ceph_decode_64(&p);
+	if (tid != 0 && tid != handle)
+		goto bad;
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, handle);
+	if (req) {
+		*(u64 *)req->buf = ceph_decode_64(&p);
+		req->result = 0;
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete_all(&req->completion);
+		put_generic_request(req);
+	}
+
+	return;
+bad:
+	pr_err("corrupt mon_get_version reply\n");
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Send MMonGetVersion and wait for the reply.
+ *
+ * @what: one of "mdsmap", "osdmap" or "monmap"
+ */
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+			     u64 *newest)
+{
+	struct ceph_mon_generic_request *req;
+	void *p, *end;
+	u64 tid;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = newest;
+	req->buf_len = sizeof(*newest);
+	init_completion(&req->completion);
+
+	req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+				    sizeof(u64) + sizeof(u32) + strlen(what),
+				    GFP_NOFS, true);
+	if (!req->request) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024,
+				  GFP_NOFS, true);
+	if (!req->reply) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	p = req->request->front.iov_base;
+	end = p + req->request->front_alloc_len;
+
+	/* fill out request */
+	mutex_lock(&monc->mutex);
+	tid = ++monc->last_tid;
+	ceph_encode_64(&p, tid); /* handle */
+	ceph_encode_string(&p, end, what, strlen(what));
+
+	err = __do_generic_request(monc, tid, req);
+
+	mutex_unlock(&monc->mutex);
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
 /*
  * pool ops
  */
@@ -981,6 +1108,10 @@
 		handle_statfs_reply(monc, msg);
 		break;
 
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		handle_get_version_reply(monc, msg);
+		break;
+
 	case CEPH_MSG_POOLOP_REPLY:
 		handle_poolop_reply(monc, msg);
 		break;
@@ -1029,6 +1160,15 @@
 	case CEPH_MSG_AUTH_REPLY:
 		m = ceph_msg_get(monc->m_auth_reply);
 		break;
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		if (le64_to_cpu(hdr->tid) != 0)
+			return get_generic_reply(con, hdr, skip);
+
+		/*
+		 * Older OSDs don't set reply tid even if the orignal
+		 * request had a non-zero tid.  Workaround this weirdness
+		 * by falling through to the allocate case.
+		 */
 	case CEPH_MSG_MON_MAP:
 	case CEPH_MSG_MDS_MAP:
 	case CEPH_MSG_OSD_MAP:

diff --git a/scripts/Makefile b/scripts/Makefile
index 1d07860..890df5c 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile

@@ -39,4 +39,4 @@
 subdir-$(CONFIG_DTC)         += dtc
 
 # Let clean descend into subdirs
-subdir-	+= basic kconfig package selinux
+subdir-	+= basic kconfig package

diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index d17e0ea..045e0098 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic

@@ -21,4 +21,3 @@
 
 $(obj)/%.h:
 	$(call cmd,wrap)
-

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 003bc26..bf3e677 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build

@@ -50,67 +50,6 @@
         endif
 endif
 
-#
-# make W=... settings
-#
-# W=1 - warnings that may be relevant and does not occur too often
-# W=2 - warnings that occur quite often but may still be relevant
-# W=3 - the more obscure warnings, can most likely be ignored
-#
-# $(call cc-option, -W...) handles gcc -W.. options which
-# are not supported by all versions of the compiler
-ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS
-warning-  := $(empty)
-
-warning-1 := -Wextra -Wunused -Wno-unused-parameter
-warning-1 += -Wmissing-declarations
-warning-1 += -Wmissing-format-attribute
-warning-1 += $(call cc-option, -Wmissing-prototypes)
-warning-1 += -Wold-style-definition
-warning-1 += $(call cc-option, -Wmissing-include-dirs)
-warning-1 += $(call cc-option, -Wunused-but-set-variable)
-warning-1 += $(call cc-disable-warning, missing-field-initializers)
-
-# Clang
-warning-1 += $(call cc-disable-warning, initializer-overrides)
-warning-1 += $(call cc-disable-warning, unused-value)
-warning-1 += $(call cc-disable-warning, format)
-warning-1 += $(call cc-disable-warning, unknown-warning-option)
-warning-1 += $(call cc-disable-warning, sign-compare)
-warning-1 += $(call cc-disable-warning, format-zero-length)
-warning-1 += $(call cc-disable-warning, uninitialized)
-warning-1 += $(call cc-option, -fcatch-undefined-behavior)
-
-warning-2 := -Waggregate-return
-warning-2 += -Wcast-align
-warning-2 += -Wdisabled-optimization
-warning-2 += -Wnested-externs
-warning-2 += -Wshadow
-warning-2 += $(call cc-option, -Wlogical-op)
-warning-2 += $(call cc-option, -Wmissing-field-initializers)
-
-warning-3 := -Wbad-function-cast
-warning-3 += -Wcast-qual
-warning-3 += -Wconversion
-warning-3 += -Wpacked
-warning-3 += -Wpadded
-warning-3 += -Wpointer-arith
-warning-3 += -Wredundant-decls
-warning-3 += -Wswitch-default
-warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
-warning-3 += $(call cc-option, -Wvla)
-
-warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
-
-ifeq ("$(strip $(warning))","")
-        $(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
-endif
-
-KBUILD_CFLAGS += $(warning)
-endif
-
 include scripts/Makefile.lib
 
 ifdef host-progs
@@ -342,7 +281,7 @@
 $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE)
 
 quiet_cmd_as_s_S = CPP $(quiet_modtag) $@
-cmd_as_s_S       = $(CPP) $(a_flags)   -o $@ $< 
+cmd_as_s_S       = $(CPP) $(a_flags)   -o $@ $<
 
 $(obj)/%.s: $(src)/%.S FORCE
 	$(call if_changed_dep,as_s_S)
@@ -436,7 +375,7 @@
 $(filter $(addprefix $(obj)/,         \
 $($(subst $(obj)/,,$(@:.o=-objs)))    \
 $($(subst $(obj)/,,$(@:.o=-y)))), $^)
- 
+
 quiet_cmd_link_multi-y = LD      $@
 cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
 

diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
new file mode 100644
index 0000000..6564350
--- /dev/null
+++ b/scripts/Makefile.extrawarn

@@ -0,0 +1,67 @@
+# ==========================================================================
+#
+# make W=... settings
+#
+# W=1 - warnings that may be relevant and does not occur too often
+# W=2 - warnings that occur quite often but may still be relevant
+# W=3 - the more obscure warnings, can most likely be ignored
+#
+# $(call cc-option, -W...) handles gcc -W.. options which
+# are not supported by all versions of the compiler
+# ==========================================================================
+
+ifeq ("$(origin W)", "command line")
+  export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W)
+endif
+
+ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS
+warning-  := $(empty)
+
+warning-1 := -Wextra -Wunused -Wno-unused-parameter
+warning-1 += -Wmissing-declarations
+warning-1 += -Wmissing-format-attribute
+warning-1 += $(call cc-option, -Wmissing-prototypes)
+warning-1 += -Wold-style-definition
+warning-1 += $(call cc-option, -Wmissing-include-dirs)
+warning-1 += $(call cc-option, -Wunused-but-set-variable)
+warning-1 += $(call cc-disable-warning, missing-field-initializers)
+
+# Clang
+warning-1 += $(call cc-disable-warning, initializer-overrides)
+warning-1 += $(call cc-disable-warning, unused-value)
+warning-1 += $(call cc-disable-warning, format)
+warning-1 += $(call cc-disable-warning, unknown-warning-option)
+warning-1 += $(call cc-disable-warning, sign-compare)
+warning-1 += $(call cc-disable-warning, format-zero-length)
+warning-1 += $(call cc-disable-warning, uninitialized)
+warning-1 += $(call cc-option, -fcatch-undefined-behavior)
+
+warning-2 := -Waggregate-return
+warning-2 += -Wcast-align
+warning-2 += -Wdisabled-optimization
+warning-2 += -Wnested-externs
+warning-2 += -Wshadow
+warning-2 += $(call cc-option, -Wlogical-op)
+warning-2 += $(call cc-option, -Wmissing-field-initializers)
+
+warning-3 := -Wbad-function-cast
+warning-3 += -Wcast-qual
+warning-3 += -Wconversion
+warning-3 += -Wpacked
+warning-3 += -Wpadded
+warning-3 += -Wpointer-arith
+warning-3 += -Wredundant-decls
+warning-3 += -Wswitch-default
+warning-3 += $(call cc-option, -Wpacked-bitfield-compat)
+warning-3 += $(call cc-option, -Wvla)
+
+warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
+warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)))
+
+ifeq ("$(strip $(warning))","")
+        $(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown)
+endif
+
+KBUILD_CFLAGS += $(warning)
+endif

diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index 4d908d1..d8e335e 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst

@@ -18,31 +18,29 @@
 include scripts/Makefile.host
 
 mod-fw := $(fw-shipped-m)
-# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the 
+# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the
 # firmware for in-kernel drivers too.
 ifndef CONFIG_FIRMWARE_IN_KERNEL
 mod-fw += $(fw-shipped-y)
 endif
 
+ifneq ($(KBUILD_SRC),)
+# Create output directory if not already present
+_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj))
+
+firmware-dirs := $(sort $(addprefix $(objtree)/$(obj)/,$(dir $(fw-external-y) $(fw-shipped-all))))
+# Create directories for firmware in subdirectories
+_dummy := $(foreach d,$(firmware-dirs), $(shell [ -d $(d) ] || mkdir -p $(d)))
+endif
+
 installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
 
 installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
-installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./
-
-# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
-PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs
-$(INSTALL_FW_PATH)/$$(%): install-all-dirs
-	@true
-install-all-dirs: $(installed-fw-dirs)
-	@true
 
 quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@)
-      cmd_install = $(INSTALL) -m0644 $< $@
+      cmd_install = mkdir -p $(@D); $(INSTALL) -m0644 $< $@
 
-$(installed-fw-dirs):
-	$(call cmd,mkdir)
-
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/%
 	$(call cmd,install)
 
 PHONY +=  __fw_install __fw_modinst FORCE

diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 1ac414f..6689364 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host

@@ -166,5 +166,4 @@
 	$(call if_changed,host-cshlib)
 
 targets += $(host-csingle)  $(host-cmulti) $(host-cobjs)\
-	   $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs) 
-
+	   $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs)

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 6a5b0de..260bf8a 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib

@@ -27,7 +27,7 @@
 # ---------------------------------------------------------------------------
 # o if we encounter foo/ in $(obj-y), replace it by foo/built-in.o
 #   and add the directory to the list of dirs to descend into: $(subdir-y)
-# o if we encounter foo/ in $(obj-m), remove it from $(obj-m) 
+# o if we encounter foo/ in $(obj-m), remove it from $(obj-m)
 #   and add the directory to the list of dirs to descend into: $(subdir-m)
 
 # Determine modorder.
@@ -46,7 +46,7 @@
 
 subdir-ym	:= $(sort $(subdir-y) $(subdir-m))
 
-# if $(foo-objs) exists, foo.o is a composite object 
+# if $(foo-objs) exists, foo.o is a composite object
 multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
 multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
 multi-used   := $(multi-used-y) $(multi-used-m)
@@ -91,7 +91,7 @@
 
 # These flags are needed for modversions and compiling, so we define them here
 # already
-# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will 
+# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will
 # end up in (or would, if it gets compiled in)
 # Note: Files that end up in two or more modules are compiled without the
 #       KBUILD_MODNAME definition. The reason is that any made-up name would
@@ -212,7 +212,7 @@
 
 # Commands useful for building a boot image
 # ===========================================================================
-# 
+#
 #	Use as following:
 #
 #	target: source(s) FORCE
@@ -226,7 +226,7 @@
 
 quiet_cmd_ld = LD      $@
 cmd_ld = $(LD) $(LDFLAGS) $(ldflags-y) $(LDFLAGS_$(@F)) \
-	       $(filter-out FORCE,$^) -o $@ 
+	       $(filter-out FORCE,$^) -o $@
 
 # Objcopy
 # ---------------------------------------------------------------------------

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 078fe1d..b304068 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c

@@ -409,10 +409,10 @@
 		exit(2);
 	}
 	if (fstat(fd, &st) < 0) {
-                fprintf(stderr, "fixdep: error fstat'ing depfile: ");
-                perror(depfile);
-                exit(2);
-        }
+		fprintf(stderr, "fixdep: error fstat'ing depfile: ");
+		perror(depfile);
+		exit(2);
+	}
 	if (st.st_size == 0) {
 		fprintf(stderr,"fixdep: %s is empty\n",depfile);
 		close(fd);

diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 544aa56..c05d586 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl

@@ -173,4 +173,3 @@
 
 # Sort output by size (last field)
 print sort { ($b =~ /:\t*(\d+)$/)[0] <=> ($a =~ /:\t*(\d+)$/)[0] } @stack;
-

diff --git a/scripts/coccinelle/misc/of_table.cocci b/scripts/coccinelle/misc/of_table.cocci
new file mode 100644
index 0000000..3c93404
--- /dev/null
+++ b/scripts/coccinelle/misc/of_table.cocci

@@ -0,0 +1,62 @@
+/// Make sure of_device_id tables are NULL terminated
+//
+// Keywords: of_table
+// Confidence: Medium
+// Options: --include-headers
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@depends on context@
+identifier var, arr;
+expression E;
+@@
+struct of_device_id arr[] = {
+	...,
+	{
+	.var = E,
+*	}
+};
+
+@depends on patch@
+identifier var, arr;
+expression E;
+@@
+struct of_device_id arr[] = {
+	...,
+	{
+	.var = E,
+-	}
++	},
++	{ }
+};
+
+@r depends on org || report@
+position p1;
+identifier var, arr;
+expression E;
+@@
+struct of_device_id arr[] = {
+	...,
+	{
+	.var = E,
+	}
+	@p1
+};
+
+@script:python depends on org@
+p1 << r.p1;
+arr << r.arr;
+@@
+
+cocci.print_main(arr,p1)
+
+@script:python depends on report@
+p1 << r.p1;
+arr << r.arr;
+@@
+
+msg = "%s is not NULL terminated at line %s" % (arr, p1[0].line)
+coccilib.report.print_report(p1[0],msg)

diff --git a/scripts/coccinelle/misc/returnvar.cocci b/scripts/coccinelle/misc/returnvar.cocci
new file mode 100644
index 0000000..605955a
--- /dev/null
+++ b/scripts/coccinelle/misc/returnvar.cocci

@@ -0,0 +1,66 @@
+///
+/// Removes unneeded variable used to store return value.
+///
+// Confidence: Moderate
+// Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6.  GPLv2.
+// URL: http://coccinelle.lip6.fr/
+// Comments: Comments on code can be deleted if near code that is removed.
+//           "when strict" can be removed to get more hits, but adds false
+//           positives
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual report
+virtual context
+virtual org
+
+@depends on patch@
+type T;
+constant C;
+identifier ret;
+@@
+- T ret = C;
+... when != ret
+    when strict
+return
+- ret
++ C
+;
+
+@depends on context@
+type T;
+constant C;
+identifier ret;
+@@
+* T ret = C;
+... when != ret
+    when strict
+* return ret;
+
+@r1 depends on report || org@
+type T;
+constant C;
+identifier ret;
+position p1, p2;
+@@
+T ret@p1 = C;
+... when != ret
+    when strict
+return ret@p2;
+
+@script:python depends on report@
+p1 << r1.p1;
+p2 << r1.p2;
+C << r1.C;
+ret << r1.ret;
+@@
+coccilib.report.print_report(p1[0], "Unneeded variable: \"" + ret + "\". Return \"" + C + "\" on line " + p2[0].line)
+
+@script:python depends on org@
+p1 << r1.p1;
+p2 << r1.p2;
+C << r1.C;
+ret << r1.ret;
+@@
+cocci.print_main("unneeded \"" + ret + "\" variable", p1)
+cocci.print_sec("return " + C + " here", p2)

diff --git a/scripts/config b/scripts/config
index 6804179..026aeb4 100755
--- a/scripts/config
+++ b/scripts/config

@@ -223,4 +223,3 @@
 		;;
 	esac
 done
-

diff --git a/scripts/conmakehash.c b/scripts/conmakehash.c
index 263a44d..61bbda5 100644
--- a/scripts/conmakehash.c
+++ b/scripts/conmakehash.c

@@ -104,7 +104,7 @@
 	}
     }
 
-  /* For now we assume the default font is always 256 characters. */    
+  /* For now we assume the default font is always 256 characters. */
   fontlen = 256;
 
   /* Initialize table */
@@ -236,15 +236,15 @@
     }
 
   /* Okay, we hit EOF, now output hash table */
-  
+
   fclose(ctbl);
-  
+
 
   /* Compute total size of Unicode list */
   nuni = 0;
   for ( i = 0 ; i < fontlen ; i++ )
     nuni += unicount[i];
-  
+
   printf("\
 /*\n\
  * Do not edit this file; it was automatically generated by\n\
@@ -268,9 +268,9 @@
       else
         printf(", ");
     }
-  
+
   printf("\nu16 dfont_unitable[%d] = \n{\n\t", nuni);
-  
+
   fp0 = 0;
   nent = 0;
   for ( i = 0 ; i < nuni ; i++ )

diff --git a/scripts/docproc.c b/scripts/docproc.c
index 2b69eaf..e267e621 100644
--- a/scripts/docproc.c
+++ b/scripts/docproc.c

@@ -154,7 +154,7 @@
 static void add_new_symbol(struct symfile *sym, char * symname)
 {
 	sym->symbollist =
-          realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
+	  realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *));
 	sym->symbollist[sym->symbolcnt++].name = strdup(symname);
 }
 
@@ -215,7 +215,7 @@
 			char *p;
 			char *e;
 			if (((p = strstr(line, "EXPORT_SYMBOL_GPL")) != NULL) ||
-                            ((p = strstr(line, "EXPORT_SYMBOL")) != NULL)) {
+			    ((p = strstr(line, "EXPORT_SYMBOL")) != NULL)) {
 				/* Skip EXPORT_SYMBOL{_GPL} */
 				while (isalnum(*p) || *p == '_')
 					p++;
@@ -291,28 +291,28 @@
 static void singfunc(char * filename, char * line)
 {
 	char *vec[200]; /* Enough for specific functions */
-        int i, idx = 0;
-        int startofsym = 1;
+	int i, idx = 0;
+	int startofsym = 1;
 	vec[idx++] = KERNELDOC;
 	vec[idx++] = DOCBOOK;
 	vec[idx++] = SHOWNOTFOUND;
 
-        /* Split line up in individual parameters preceded by FUNCTION */
-        for (i=0; line[i]; i++) {
-                if (isspace(line[i])) {
-                        line[i] = '\0';
-                        startofsym = 1;
-                        continue;
-                }
-                if (startofsym) {
-                        startofsym = 0;
-                        vec[idx++] = FUNCTION;
-                        vec[idx++] = &line[i];
-                }
-        }
+	/* Split line up in individual parameters preceded by FUNCTION */
+	for (i=0; line[i]; i++) {
+		if (isspace(line[i])) {
+			line[i] = '\0';
+			startofsym = 1;
+			continue;
+		}
+		if (startofsym) {
+			startofsym = 0;
+			vec[idx++] = FUNCTION;
+			vec[idx++] = &line[i];
+		}
+	}
 	for (i = 0; i < idx; i++) {
-        	if (strcmp(vec[i], FUNCTION))
-        		continue;
+		if (strcmp(vec[i], FUNCTION))
+			continue;
 		consume_symbol(vec[i + 1]);
 	}
 	vec[idx++] = filename;
@@ -460,14 +460,14 @@
 					break;
 				case 'D':
 					while (*s && !isspace(*s)) s++;
-                                        *s = '\0';
-                                        symbolsonly(line+2);
-                                        break;
+					*s = '\0';
+					symbolsonly(line+2);
+					break;
 				case 'F':
 					/* filename */
 					while (*s && !isspace(*s)) s++;
 					*s++ = '\0';
-                                        /* function names */
+					/* function names */
 					while (isspace(*s))
 						s++;
 					singlefunctions(line +2, s);
@@ -515,11 +515,11 @@
 	}
 	/* Open file, exit on error */
 	infile = fopen(argv[2], "r");
-        if (infile == NULL) {
-                fprintf(stderr, "docproc: ");
-                perror(argv[2]);
-                exit(2);
-        }
+	if (infile == NULL) {
+		fprintf(stderr, "docproc: ");
+		perror(argv[2]);
+		exit(2);
+	}
 
 	if (strcmp("doc", argv[1]) == 0) {
 		/* Need to do this in two passes.

diff --git a/scripts/dtc/.gitignore b/scripts/dtc/.gitignore
index 095acb4..cdabdc9 100644
--- a/scripts/dtc/.gitignore
+++ b/scripts/dtc/.gitignore

@@ -2,4 +2,3 @@
 dtc-lexer.lex.c
 dtc-parser.tab.c
 dtc-parser.tab.h
-

diff --git a/scripts/dtc/fstree.c b/scripts/dtc/fstree.c
index f377453..e464727 100644
--- a/scripts/dtc/fstree.c
+++ b/scripts/dtc/fstree.c

@@ -88,4 +88,3 @@
 
 	return build_boot_info(NULL, tree, guess_boot_cpuid(tree));
 }
-

diff --git a/scripts/dtc/libfdt/fdt_empty_tree.c b/scripts/dtc/libfdt/fdt_empty_tree.c
index f72d13b..f2ae9b7 100644
--- a/scripts/dtc/libfdt/fdt_empty_tree.c
+++ b/scripts/dtc/libfdt/fdt_empty_tree.c

@@ -81,4 +81,3 @@
 
 	return fdt_open_into(buf, buf, bufsize);
 }
-

diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c
index 33eeba5..5740e69 100644
--- a/scripts/dtc/treesource.c
+++ b/scripts/dtc/treesource.c

@@ -281,4 +281,3 @@
 
 	write_tree_source_node(f, bi->dt, 0);
 }
-

diff --git a/scripts/headers.sh b/scripts/headers.sh
index 978b42b..95ece06 100755
--- a/scripts/headers.sh
+++ b/scripts/headers.sh

@@ -28,5 +28,3 @@
 		;;
 	esac
 done
-
-

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 1237dd7..dc7aa45 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c

@@ -123,7 +123,7 @@
 	}
 	if (strlen(str) > KSYM_NAME_LEN) {
 		fprintf(stderr, "Symbol %s too long for kallsyms (%zu vs %d).\n"
-                                "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
+				"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
 			str, strlen(str), KSYM_NAME_LEN);
 		return -1;
 	}

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 844bc9d..9c4d241 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile

@@ -33,11 +33,11 @@
 	$< --$@ $(Kconfig)
 
 silentoldconfig: $(obj)/conf
-	$(Q)mkdir -p include/generated
+	$(Q)mkdir -p include/config include/generated
 	$< --$@ $(Kconfig)
 
 localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf
-	$(Q)mkdir -p include/generated
+	$(Q)mkdir -p include/config include/generated
 	$(Q)perl $< --$@ $(srctree) $(Kconfig) > .tmp.config
 	$(Q)if [ -f .config ]; then 					\
 			cmp -s .tmp.config .config ||			\
@@ -319,4 +319,3 @@
 $(obj)/gconf.glade.h: $(obj)/gconf.glade
 	$(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \
 	$(obj)/gconf.glade
-

diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh
index 854d9c7..55b79ba 100755
--- a/scripts/kconfig/check.sh
+++ b/scripts/kconfig/check.sh

@@ -11,4 +11,3 @@
 if [ ! "$?" -eq "0"  ]; then
 	echo -DKBUILD_NO_NLS;
 fi
-

diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index d19944f..fef75fc 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c

@@ -696,7 +696,7 @@
 	} else if (input_mode == savedefconfig) {
 		if (conf_write_defconfig(defconfig_file)) {
 			fprintf(stderr, _("n*** Error while saving defconfig to: %s\n\n"),
-			        defconfig_file);
+				defconfig_file);
 			return 1;
 		}
 	} else if (input_mode != listnewconfig) {

diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index f2bee70..d0a35b2 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c

@@ -1404,7 +1404,7 @@
 		    && (tree == tree2))
 			continue;
 /*
-                if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT))
+		if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT))
 		    || (view_mode == FULL_VIEW)
 		    || (view_mode == SPLIT_VIEW))*/
 

diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c
index 3b15c08..8d016fa 100644
--- a/scripts/kconfig/lxdialog/checklist.c
+++ b/scripts/kconfig/lxdialog/checklist.c

@@ -168,13 +168,13 @@
 
 	/* create new window for the list */
 	list = subwin(dialog, list_height, list_width, y + box_y + 1,
-	              x + box_x + 1);
+		      x + box_x + 1);
 
 	keypad(list, TRUE);
 
 	/* draw a box around the list items */
 	draw_box(dialog, box_y, box_x, list_height + 2, list_width + 2,
-	         dlg.menubox_border.atr, dlg.menubox.atr);
+		 dlg.menubox_border.atr, dlg.menubox.atr);
 
 	/* Find length of longest item in order to center checklist */
 	check_x = 0;

diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c
index 447a582..d58de1d 100644
--- a/scripts/kconfig/lxdialog/inputbox.c
+++ b/scripts/kconfig/lxdialog/inputbox.c

@@ -42,7 +42,7 @@
  * Display a dialog box for inputing a string
  */
 int dialog_inputbox(const char *title, const char *prompt, int height, int width,
-                    const char *init)
+		    const char *init)
 {
 	int i, x, y, box_y, box_x, box_width;
 	int input_x = 0, key = 0, button = -1;

diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c
index c93de0b..11ae9ad7 100644
--- a/scripts/kconfig/lxdialog/menubox.c
+++ b/scripts/kconfig/lxdialog/menubox.c

@@ -64,7 +64,7 @@
  * Print menu item
  */
 static void do_print_item(WINDOW * win, const char *item, int line_y,
-                          int selected, int hotkey)
+			  int selected, int hotkey)
 {
 	int j;
 	char *menu_item = malloc(menu_width + 1);
@@ -182,7 +182,7 @@
  * Display a menu for choosing among a number of options
  */
 int dialog_menu(const char *title, const char *prompt,
-                const void *selected, int *s_scroll)
+		const void *selected, int *s_scroll)
 {
 	int i, j, x, y, box_x, box_y;
 	int height, width, menu_height;

diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c
index 58a8289..f7abdeb 100644
--- a/scripts/kconfig/lxdialog/util.c
+++ b/scripts/kconfig/lxdialog/util.c

@@ -623,7 +623,7 @@
 void item_add_str(const char *fmt, ...)
 {
 	va_list ap;
-        size_t avail;
+	size_t avail;
 
 	avail = sizeof(item_cur->node.str) - strlen(item_cur->node.str);
 

diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 59184bb..14cea74 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c

@@ -299,7 +299,7 @@
 	int size;
 
 	size = snprintf(menu_backtitle, sizeof(menu_backtitle),
-	                "%s - %s", config_filename, rootmenu.prompt->text);
+			"%s - %s", config_filename, rootmenu.prompt->text);
 	if (size >= sizeof(menu_backtitle))
 		menu_backtitle[sizeof(menu_backtitle)-1] = '\0';
 	set_dialog_backtitle(menu_backtitle);
@@ -1034,4 +1034,3 @@
 
 	return res;
 }
-

diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 3ac2c9c..a26cc5d 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c

@@ -258,8 +258,8 @@
 				    "config symbol '%s' uses select, but is "
 				    "not boolean or tristate", sym->name);
 			else if (sym2->type != S_UNKNOWN &&
-			         sym2->type != S_BOOLEAN &&
-			         sym2->type != S_TRISTATE)
+				 sym2->type != S_BOOLEAN &&
+				 sym2->type != S_TRISTATE)
 				prop_warn(prop,
 				    "'%s' has wrong type. 'select' only "
 				    "accept arguments of boolean and "
@@ -268,7 +268,7 @@
 		case P_RANGE:
 			if (sym->type != S_INT && sym->type != S_HEX)
 				prop_warn(prop, "range is only allowed "
-				                "for int or hex symbols");
+						"for int or hex symbols");
 			if (!menu_validate_number(sym, prop->expr->left.sym) ||
 			    !menu_validate_number(sym, prop->expr->right.sym))
 				prop_warn(prop, "range is invalid");

diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index 4fbecd2..984489e 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c

@@ -1554,4 +1554,3 @@
 	endwin();
 	return 0;
 }
-

diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index 3133172..9cb8522 100644
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl

@@ -589,7 +589,7 @@
 
     # Now we need to see if we have to check selects;
     loop_select;
-}	    
+}
 
 my %setconfigs;
 

diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index 6e7fbf1..94f9c83 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c

@@ -155,5 +155,3 @@
 	fprintf(stderr, "Out of memory.\n");
 	exit(1);
 }
-
-

diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
index 1a9f53e..6c62d93 100644
--- a/scripts/kconfig/zconf.l
+++ b/scripts/kconfig/zconf.l

@@ -27,8 +27,8 @@
 static int text_size, text_asize;
 
 struct buffer {
-        struct buffer *parent;
-        YY_BUFFER_STATE state;
+	struct buffer *parent;
+	YY_BUFFER_STATE state;
 };
 
 struct buffer *current_buf;

diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped
index a0521aa..349a7f2 100644
--- a/scripts/kconfig/zconf.lex.c_shipped
+++ b/scripts/kconfig/zconf.lex.c_shipped

@@ -789,8 +789,8 @@
 static int text_size, text_asize;
 
 struct buffer {
-        struct buffer *parent;
-        YY_BUFFER_STATE state;
+	struct buffer *parent;
+	YY_BUFFER_STATE state;
 };
 
 struct buffer *current_buf;

diff --git a/scripts/kconfig/zconf.tab.c_shipped b/scripts/kconfig/zconf.tab.c_shipped
index 25ae16a..de5e84e 100644
--- a/scripts/kconfig/zconf.tab.c_shipped
+++ b/scripts/kconfig/zconf.tab.c_shipped

@@ -2314,7 +2314,7 @@
 	for_all_symbols(i, sym) {
 		if (sym_check_deps(sym))
 			zconfnerrs++;
-        }
+	}
 	if (zconfnerrs)
 		exit(1);
 	sym_set_change_count(1);

diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
index 0653886..0f683cf 100644
--- a/scripts/kconfig/zconf.y
+++ b/scripts/kconfig/zconf.y

@@ -510,7 +510,7 @@
 	for_all_symbols(i, sym) {
 		if (sym_check_deps(sym))
 			zconfnerrs++;
-        }
+	}
 	if (zconfnerrs)
 		exit(1);
 	sym_set_change_count(1);

diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
index 827896f..c21d163 100644
--- a/scripts/markup_oops.pl
+++ b/scripts/markup_oops.pl

@@ -367,4 +367,3 @@
 EOT
 	exit;
 }
-

diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index cfb8440..6fdc97e 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h

@@ -68,7 +68,7 @@
 
 ( echo /\* This file is auto generated, version $VERSION \*/
   if [ -n "$CONFIG_FLAGS" ] ; then echo "/* $CONFIG_FLAGS */"; fi
-  
+
   echo \#define UTS_MACHINE \"$ARCH\"
 
   echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\"
@@ -84,7 +84,7 @@
 # recompilations.
 # We don't consider the file changed if only the date/time changed.
 # A kernel config change will increase the generation number, thus
-# causing compile.h to be updated (including date/time) due to the 
+# causing compile.h to be updated (including date/time) due to the
 # changed comment in the
 # first line.
 

diff --git a/scripts/mkmakefile b/scripts/mkmakefile
index 0cc0442..84af27b 100644
--- a/scripts/mkmakefile
+++ b/scripts/mkmakefile

@@ -42,18 +42,11 @@
 
 MAKEFLAGS += --no-print-directory
 
-.PHONY: all \$(MAKECMDGOALS)
+.PHONY: __sub-make \$(MAKECMDGOALS)
 
-all	:= \$(filter-out all Makefile,\$(MAKECMDGOALS))
+__sub-make:
+	\$(Q)\$(MAKE) \$(MAKEARGS) \$(MAKECMDGOALS)
 
-all:
-	\$(Q)\$(MAKE) \$(MAKEARGS) \$(all)
-
-Makefile:;
-
-\$(all): all
-	@:
-
-%/: all
+\$(filter-out __sub-make, \$(MAKECMDGOALS)): __sub-make
 	@:
 EOF

diff --git a/scripts/mksysmap b/scripts/mksysmap
index c1b6191..7ada35a 100644
--- a/scripts/mksysmap
+++ b/scripts/mksysmap

@@ -42,4 +42,3 @@
 # (At least sparc64 has __crc_ in the middle).
 
 $NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)' > $2
-

diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore
index 33bae0d..3bd11b6 100644
--- a/scripts/mod/.gitignore
+++ b/scripts/mod/.gitignore

@@ -2,4 +2,3 @@
 mk_elfconfig
 modpost
 devicetable-offsets.h
-

diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 1924990..e614ef6 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c

@@ -644,28 +644,26 @@
 
 static int do_of_entry (const char *filename, void *symval, char *alias)
 {
-    int len;
-    char *tmp;
-    DEF_FIELD_ADDR(symval, of_device_id, name);
-    DEF_FIELD_ADDR(symval, of_device_id, type);
-    DEF_FIELD_ADDR(symval, of_device_id, compatible);
+	int len;
+	char *tmp;
+	DEF_FIELD_ADDR(symval, of_device_id, name);
+	DEF_FIELD_ADDR(symval, of_device_id, type);
+	DEF_FIELD_ADDR(symval, of_device_id, compatible);
 
-    len = sprintf (alias, "of:N%sT%s",
-                    (*name)[0] ? *name : "*",
-                    (*type)[0] ? *type : "*");
+	len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*",
+		      (*type)[0] ? *type : "*");
 
-    if (compatible[0])
-        sprintf (&alias[len], "%sC%s",
-                     (*type)[0] ? "*" : "",
-                     *compatible);
+	if (compatible[0])
+		sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "",
+			*compatible);
 
-    /* Replace all whitespace with underscores */
-    for (tmp = alias; tmp && *tmp; tmp++)
-        if (isspace (*tmp))
-            *tmp = '_';
+	/* Replace all whitespace with underscores */
+	for (tmp = alias; tmp && *tmp; tmp++)
+		if (isspace (*tmp))
+			*tmp = '_';
 
-    add_wildcard(alias);
-    return 1;
+	add_wildcard(alias);
+	return 1;
 }
 ADD_TO_DEVTABLE("of", of_device_id, do_of_entry);
 

diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index 639bca7..a4fd71d 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c

@@ -54,4 +54,3 @@
 
 	return 0;
 }
-

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 026543b..9d9c5b9 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -862,7 +862,7 @@
  * without "ax" / "aw".
  */
 static void check_section(const char *modname, struct elf_info *elf,
-                          Elf_Shdr *sechdr)
+			  Elf_Shdr *sechdr)
 {
 	const char *sec = sech_name(elf, sechdr);
 
@@ -1296,12 +1296,12 @@
  */
 static void report_sec_mismatch(const char *modname,
 				const struct sectioncheck *mismatch,
-                                const char *fromsec,
-                                unsigned long long fromaddr,
-                                const char *fromsym,
-                                int from_is_func,
-                                const char *tosec, const char *tosym,
-                                int to_is_func)
+				const char *fromsec,
+				unsigned long long fromaddr,
+				const char *fromsym,
+				int from_is_func,
+				const char *tosec, const char *tosym,
+				int to_is_func)
 {
 	const char *from, *from_p;
 	const char *to, *to_p;
@@ -1441,7 +1441,7 @@
 }
 
 static void check_section_mismatch(const char *modname, struct elf_info *elf,
-                                   Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
+				   Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
 {
 	const char *tosec;
 	const struct sectioncheck *mismatch;
@@ -1528,7 +1528,7 @@
 	case R_ARM_ABS32:
 		/* From ARM ABI: (S + A) | T */
 		r->r_addend = (int)(long)
-		              (elf->symtab_start + ELF_R_SYM(r->r_info));
+			      (elf->symtab_start + ELF_R_SYM(r->r_info));
 		break;
 	case R_ARM_PC24:
 	case R_ARM_CALL:
@@ -1538,8 +1538,8 @@
 	case R_ARM_THM_JUMP19:
 		/* From ARM ABI: ((S + A) | T) - P */
 		r->r_addend = (int)(long)(elf->hdr +
-		              sechdr->sh_offset +
-		              (r->r_offset - sechdr->sh_addr));
+			      sechdr->sh_offset +
+			      (r->r_offset - sechdr->sh_addr));
 		break;
 	default:
 		return 1;
@@ -1571,7 +1571,7 @@
 }
 
 static void section_rela(const char *modname, struct elf_info *elf,
-                         Elf_Shdr *sechdr)
+			 Elf_Shdr *sechdr)
 {
 	Elf_Sym  *sym;
 	Elf_Rela *rela;
@@ -1615,7 +1615,7 @@
 }
 
 static void section_rel(const char *modname, struct elf_info *elf,
-                        Elf_Shdr *sechdr)
+			Elf_Shdr *sechdr)
 {
 	Elf_Sym *sym;
 	Elf_Rel *rel;
@@ -1685,7 +1685,7 @@
  * be discarded and warns about it.
  **/
 static void check_sec_ref(struct module *mod, const char *modname,
-                          struct elf_info *elf)
+			  struct elf_info *elf)
 {
 	int i;
 	Elf_Shdr *sechdrs = elf->sechdrs;
@@ -1945,7 +1945,7 @@
 					     s->name, mod->name);
 				} else {
 					merror("\"%s\" [%s.ko] undefined!\n",
-					          s->name, mod->name);
+					       s->name, mod->name);
 					err = 1;
 				}
 			}

diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index deb2994..944418d 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c

@@ -214,7 +214,7 @@
 	mctx->block[14] = mctx->byte_count << 3;
 	mctx->block[15] = mctx->byte_count >> 29;
 	le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
-	                  sizeof(uint64_t)) / sizeof(uint32_t));
+			  sizeof(uint64_t)) / sizeof(uint32_t));
 	md4_transform(mctx->hash, mctx->block);
 	cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(uint32_t));
 
@@ -367,7 +367,7 @@
 			break;
 		/* Terminate line at first space, to get rid of final ' \' */
 		while (*p) {
-                       if (isspace(*p)) {
+			if (isspace(*p)) {
 				*p = '\0';
 				break;
 			}

diff --git a/scripts/objdiff b/scripts/objdiff
index b3e4f10..62e51da 100755
--- a/scripts/objdiff
+++ b/scripts/objdiff

@@ -25,25 +25,47 @@
 #
 # Note: 'make mrproper' will also remove .tmp_objdiff
 
-GIT_DIR="`git rev-parse --git-dir`"
+SRCTREE=$(cd $(git rev-parse --show-toplevel 2>/dev/null); pwd)
 
-if [ -d "$GIT_DIR" ]; then
-	TMPD="${GIT_DIR%git}tmp_objdiff"
-
-	[ -d "$TMPD" ] || mkdir "$TMPD"
-else
-	echo "ERROR: git directory not found."
+if [ -z "$SRCTREE" ]; then
+	echo >&2 "ERROR: Not a git repository."
 	exit 1
 fi
 
+TMPD=$SRCTREE/.tmp_objdiff
+
 usage() {
-	echo "Usage: $0 <command> <args>"
-	echo "  record    <list of object files>"
-	echo "  diff      <commitA> <commitB>"
-	echo "  clean     all | <commit>"
+	echo >&2 "Usage: $0 <command> <args>"
+	echo >&2 "  record    <list of object files or directories>"
+	echo >&2 "  diff      <commitA> <commitB>"
+	echo >&2 "  clean     all | <commit>"
 	exit 1
 }
 
+get_output_dir() {
+	dir=${1%/*}
+
+	if [ "$dir" = "$1" ]; then
+		dir=.
+	fi
+
+	dir=$(cd $dir; pwd)
+
+	echo $TMPD/$CMT${dir#$SRCTREE}
+}
+
+do_objdump() {
+	dir=$(get_output_dir $1)
+	base=${1##*/}
+	dis=$dir/${base%.o}.dis
+
+	[ ! -d "$dir" ] && mkdir -p $dir
+
+	# remove addresses for a cleaner diff
+	# http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and
+	$OBJDUMP -D $1 | sed "s/^[[:space:]]\+[0-9a-f]\+//" > $dis
+}
+
 dorecord() {
 	[ $# -eq 0 ] && usage
 
@@ -52,20 +74,16 @@
 	CMT="`git rev-parse --short HEAD`"
 
 	OBJDUMP="${CROSS_COMPILE}objdump"
-	OBJDIFFD="$TMPD/$CMT"
 
-	[ ! -d "$OBJDIFFD" ] && mkdir -p "$OBJDIFFD"
-
-	for f in $FILES; do
-		dn="${f%/*}"
-		bn="${f##*/}"
-
-		[ ! -d "$OBJDIFFD/$dn" ] && mkdir -p "$OBJDIFFD/$dn"
-
-		# remove addresses for a more clear diff
-		# http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and
-		$OBJDUMP -D "$f" | sed "s/^[[:space:]]\+[0-9a-f]\+//" \
-			>"$OBJDIFFD/$dn/$bn"
+	for d in $FILES; do
+		if [ -d "$d" ]; then
+			for f in $(find $d -name '*.o')
+			do
+				do_objdump $f
+			done
+		else
+			do_objdump $d
+		fi
 	done
 }
 
@@ -90,12 +108,12 @@
 	DSTD="$TMPD/$DST"
 
 	if [ ! -d "$SRCD" ]; then
-		echo "ERROR: $SRCD doesn't exist"
+		echo >&2 "ERROR: $SRCD doesn't exist"
 		exit 1
 	fi
 
 	if [ ! -d "$DSTD" ]; then
-		echo "ERROR: $DSTD doesn't exist"
+		echo >&2 "ERROR: $DSTD doesn't exist"
 		exit 1
 	fi
 
@@ -114,7 +132,7 @@
 		if [ -d "$TMPD/$CMT" ]; then
 			rm -rf $TMPD/$CMT
 		else
-			echo "$CMT not found"
+			echo >&2 "$CMT not found"
 		fi
 	fi
 }
@@ -135,7 +153,7 @@
 		doclean $*
 		;;
 	*)
-		echo "Unrecognized command '$1'"
+		echo >&2 "Unrecognized command '$1'"
 		exit 1
 		;;
 esac

diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index c5d4733..99ca6e7 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile

@@ -143,4 +143,3 @@
 	@echo '  perf-targz-src-pkg  - Build $(perf-tar).tar.gz source tarball'
 	@echo '  perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball'
 	@echo '  perf-tarxz-src-pkg  - Build $(perf-tar).tar.xz source tarball'
-

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index f46e4dd..b5f08f7 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb

@@ -35,13 +35,15 @@
 	sparc*)
 		debarch=sparc ;;
 	s390*)
-		debarch=s390 ;;
+		debarch=s390$(grep -q CONFIG_64BIT=y $KCONFIG_CONFIG && echo x || true) ;;
 	ppc*)
 		debarch=powerpc ;;
 	parisc*)
 		debarch=hppa ;;
 	mips*)
 		debarch=mips$(grep -q CPU_LITTLE_ENDIAN=y $KCONFIG_CONFIG && echo el || true) ;;
+	arm64)
+		debarch=arm64 ;;
 	arm*)
 		debarch=arm$(grep -q CONFIG_AEABI=y $KCONFIG_CONFIG && echo el || true) ;;
 	*)
@@ -130,7 +132,7 @@
 	cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map"
 	cp $KCONFIG_CONFIG "$tmpdir/usr/share/doc/$packagename/config"
 	gzip "$tmpdir/usr/share/doc/$packagename/config"
-else 
+else
 	cp System.map "$tmpdir/boot/System.map-$version"
 	cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version"
 fi
@@ -155,11 +157,11 @@
 			for module in $(find lib/modules/ -name *.ko); do
 				mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module)
 				# only keep debug symbols in the debug file
-				objcopy --only-keep-debug $module $dbg_dir/usr/lib/debug/$module
+				$OBJCOPY --only-keep-debug $module $dbg_dir/usr/lib/debug/$module
 				# strip original module from debug symbols
-				objcopy --strip-debug $module
+				$OBJCOPY --strip-debug $module
 				# then add a link to those
-				objcopy --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module
+				$OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module
 			done
 		)
 	fi

diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index aa22f94..995c1ea 100644
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar

@@ -136,4 +136,3 @@
 echo "Tarball successfully created in ${tarball}${file_ext}"
 
 exit 0
-

diff --git a/scripts/patch-kernel b/scripts/patch-kernel
index d000ea3..49b4241 100755
--- a/scripts/patch-kernel
+++ b/scripts/patch-kernel

@@ -27,7 +27,7 @@
 #       Nick Holloway <Nick.Holloway@alfie.demon.co.uk>, 2nd January 1995.
 #
 # Added support for handling multiple types of compression. What includes
-# gzip, bzip, bzip2, zip, compress, and plaintext. 
+# gzip, bzip, bzip2, zip, compress, and plaintext.
 #
 #       Adam Sulmicki <adam@cfar.umd.edu>, 1st January 1997.
 #
@@ -159,7 +159,7 @@
   fi
   # Remove backup files
   find $sourcedir/ '(' -name '*.orig' -o -name '.*.orig' ')' -exec rm -f {} \;
- 
+
   return 0;
 }
 

diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c
index 68bb4ef..4718d78 100644
--- a/scripts/pnmtologo.c
+++ b/scripts/pnmtologo.c

@@ -512,4 +512,3 @@
     }
     exit(0);
 }
-

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index e11aa4a..650ecc8 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c

@@ -487,5 +487,3 @@
 	}
 	return !!n_error;
 }
-
-

diff --git a/scripts/rt-tester/check-all.sh b/scripts/rt-tester/check-all.sh
index 43098af..6b5c83b 100644
--- a/scripts/rt-tester/check-all.sh
+++ b/scripts/rt-tester/check-all.sh

@@ -19,4 +19,3 @@
 testit t4-l2-pi-deboost.tst
 testit t5-l4-pi-boost-deboost.tst
 testit t5-l4-pi-boost-deboost-setsched.tst
-

diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 34186ca..6d916c2 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py

@@ -216,5 +216,3 @@
 # Normal exit pass
 print "Pass"
 sys.exit(0)
-
-

diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh
index 7b9ccf6..f6a0ce7 100644
--- a/scripts/selinux/install_policy.sh
+++ b/scripts/selinux/install_policy.sh

@@ -66,4 +66,3 @@
 	$SF file_contexts /dev
 	mount --move /mnt /dev
 fi
-

diff --git a/scripts/show_delta b/scripts/show_delta
index e25732b..5b36500 100755
--- a/scripts/show_delta
+++ b/scripts/show_delta

@@ -126,4 +126,3 @@
 		print (convert_line(line, base_time),)
 
 main()
-

diff --git a/scripts/tags.sh b/scripts/tags.sh
index f2c5b00..e6b011f 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh

@@ -25,6 +25,9 @@
 	tree=${srctree}/
 fi
 
+# ignore userspace tools
+ignore="$ignore ( -path ${tree}tools ) -prune -o"
+
 # Find all available archs
 find_all_archs()
 {
@@ -47,7 +50,8 @@
 	for i in $archincludedir; do
 		prune="$prune -wholename $i -prune -o"
 	done
-	find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" -print;
+	find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" \
+		-not -type l -print;
 }
 
 # find sources in arch/$1/include
@@ -57,14 +61,15 @@
 					-name include -type d -print);
 	if [ -n "$include" ]; then
 		archincludedir="$archincludedir $include"
-		find $include $ignore -name "$2" -print;
+		find $include $ignore -name "$2" -not -type l -print;
 	fi
 }
 
 # find sources in include/
 find_include_sources()
 {
-	find ${tree}include $ignore -name config -prune -o -name "$1" -print;
+	find ${tree}include $ignore -name config -prune -o -name "$1" \
+		-not -type l -print;
 }
 
 # find sources in rest of tree
@@ -73,7 +78,7 @@
 {
 	find ${tree}* $ignore \
 	     \( -name include -o -name arch -o -name '.tmp_*' \) -prune -o \
-	       -name "$1" -print;
+	       -name "$1" -not -type l -print;
 }
 
 find_sources()
@@ -187,6 +192,10 @@
 	--regex-c++='/TESTCLEARFLAG_FALSE\(([^,)]*).*/TestClearPage\1/' \
 	--regex-c++='/__TESTCLEARFLAG_FALSE\(([^,)]*).*/__TestClearPage\1/' \
 	--regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'		\
+	--regex-c++='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/'		\
+	--regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/'		\
+	--regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/'	\
+	--regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
 	--regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \
 	--regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \
 	--regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/'	\
@@ -201,7 +210,8 @@
 	--regex-c='/DECLARE_(TASKLET|WORK|DELAYED_WORK)\((\w*)/\2/v/'	\
 	--regex-c='/DEFINE_PCI_DEVICE_TABLE\((\w*)/\1/v/'		\
 	--regex-c='/(^\s)OFFSET\((\w*)/\2/v/'				\
-	--regex-c='/(^\s)DEFINE\((\w*)/\2/v/'
+	--regex-c='/(^\s)DEFINE\((\w*)/\2/v/'				\
+	--regex-c='/DEFINE_HASHTABLE\((\w*)/\1/v/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--langdef=kconfig --language-force=kconfig              \
@@ -244,9 +254,14 @@
 	--regex='/__CLEARPAGEFLAG_NOOP(\([^,)]*\).*/__ClearPage\1/' \
 	--regex='/TESTCLEARFLAG_FALSE(\([^,)]*\).*/TestClearPage\1/' \
 	--regex='/__TESTCLEARFLAG_FALSE(\([^,)]*\).*/__TestClearPage\1/' \
+	--regex='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/'	\
+	--regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/'	\
+	--regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/'	\
+	--regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
 	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'		\
 	--regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \
-	--regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'
+	--regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\
+	--regex='/DEFINE_HASHTABLE\((\w*)/\1/v/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/'
@@ -266,7 +281,7 @@
 		emacs $1
 	else
 		all_target_sources | xargs $1 -a
-        fi
+	fi
 }
 
 # Support um (which uses SUBARCH)

diff --git a/security/integrity/evm/Kconfig b/security/integrity/evm/Kconfig
index d35b491..d606f3d 100644
--- a/security/integrity/evm/Kconfig
+++ b/security/integrity/evm/Kconfig

@@ -12,15 +12,41 @@
 
 	  If you are unsure how to answer this question, answer N.
 
-config EVM_HMAC_VERSION
-	int "EVM HMAC version"
-	depends on EVM
-	default 2
-	help
-	  This options adds EVM HMAC version support.
-	  1 - original version
-	  2 - add per filesystem unique identifier (UUID) (default)
+if EVM
 
-	  WARNING: changing the HMAC calculation method or adding 
+menu "EVM options"
+
+config EVM_ATTR_FSUUID
+	bool "FSUUID (version 2)"
+	default y
+	depends on EVM
+	help
+	  Include filesystem UUID for HMAC calculation.
+
+	  Default value is 'selected', which is former version 2.
+	  if 'not selected', it is former version 1
+
+	  WARNING: changing the HMAC calculation method or adding
 	  additional info to the calculation, requires existing EVM
-	  labeled file systems to be relabeled.  
+	  labeled file systems to be relabeled.
+
+config EVM_EXTRA_SMACK_XATTRS
+	bool "Additional SMACK xattrs"
+	depends on EVM && SECURITY_SMACK
+	default n
+	help
+	  Include additional SMACK xattrs for HMAC calculation.
+
+	  In addition to the original security xattrs (eg. security.selinux,
+	  security.SMACK64, security.capability, and security.ima) included
+	  in the HMAC calculation, enabling this option includes newly defined
+	  Smack xattrs: security.SMACK64EXEC, security.SMACK64TRANSMUTE and
+	  security.SMACK64MMAP.
+
+	  WARNING: changing the HMAC calculation method or adding
+	  additional info to the calculation, requires existing EVM
+	  labeled file systems to be relabeled.
+
+endmenu
+
+endif

diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h
index 37c88dd..88bfe77 100644
--- a/security/integrity/evm/evm.h
+++ b/security/integrity/evm/evm.h

@@ -24,7 +24,10 @@
 extern int evm_initialized;
 extern char *evm_hmac;
 extern char *evm_hash;
-extern int evm_hmac_version;
+
+#define EVM_ATTR_FSUUID		0x0001
+
+extern int evm_hmac_attrs;
 
 extern struct crypto_shash *hmac_tfm;
 extern struct crypto_shash *hash_tfm;

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 6b540f1..5e9687f 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c

@@ -112,7 +112,7 @@
 	hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
 	hmac_misc.mode = inode->i_mode;
 	crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
-	if (evm_hmac_version > 1)
+	if (evm_hmac_attrs & EVM_ATTR_FSUUID)
 		crypto_shash_update(desc, inode->i_sb->s_uuid,
 				    sizeof(inode->i_sb->s_uuid));
 	crypto_shash_final(desc, digest);

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 6e0bd93..3bcb80d 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c

@@ -32,7 +32,7 @@
 };
 char *evm_hmac = "hmac(sha1)";
 char *evm_hash = "sha1";
-int evm_hmac_version = CONFIG_EVM_HMAC_VERSION;
+int evm_hmac_attrs;
 
 char *evm_config_xattrnames[] = {
 #ifdef CONFIG_SECURITY_SELINUX
@@ -40,6 +40,11 @@
 #endif
 #ifdef CONFIG_SECURITY_SMACK
 	XATTR_NAME_SMACK,
+#ifdef CONFIG_EVM_EXTRA_SMACK_XATTRS
+	XATTR_NAME_SMACKEXEC,
+	XATTR_NAME_SMACKTRANSMUTE,
+	XATTR_NAME_SMACKMMAP,
+#endif
 #endif
 #ifdef CONFIG_IMA_APPRAISE
 	XATTR_NAME_IMA,
@@ -57,6 +62,14 @@
 }
 __setup("evm=", evm_set_fixmode);
 
+static void __init evm_init_config(void)
+{
+#ifdef CONFIG_EVM_ATTR_FSUUID
+	evm_hmac_attrs |= EVM_ATTR_FSUUID;
+#endif
+	pr_info("HMAC attrs: 0x%x\n", evm_hmac_attrs);
+}
+
 static int evm_find_protected_xattrs(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
@@ -287,12 +300,20 @@
  * @xattr_value: pointer to the new extended attribute value
  * @xattr_value_len: pointer to the new extended attribute value length
  *
- * Updating 'security.evm' requires CAP_SYS_ADMIN privileges and that
- * the current value is valid.
+ * Before allowing the 'security.evm' protected xattr to be updated,
+ * verify the existing value is valid.  As only the kernel should have
+ * access to the EVM encrypted key needed to calculate the HMAC, prevent
+ * userspace from writing HMAC value.  Writing 'security.evm' requires
+ * requires CAP_SYS_ADMIN privileges.
  */
 int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		       const void *xattr_value, size_t xattr_value_len)
 {
+	const struct evm_ima_xattr_data *xattr_data = xattr_value;
+
+	if ((strcmp(xattr_name, XATTR_NAME_EVM) == 0)
+	    && (xattr_data->type == EVM_XATTR_HMAC))
+		return -EPERM;
 	return evm_protect_xattr(dentry, xattr_name, xattr_value,
 				 xattr_value_len);
 }
@@ -432,6 +453,8 @@
 {
 	int error;
 
+	evm_init_config();
+
 	error = evm_init_secfs();
 	if (error < 0) {
 		pr_info("Error registering secfs\n");

diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 291bf0f..d3113d4 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c

@@ -341,7 +341,7 @@
 	return 0;
 }
 
-static void ima_reset_appraise_flags(struct inode *inode)
+static void ima_reset_appraise_flags(struct inode *inode, int digsig)
 {
 	struct integrity_iint_cache *iint;
 
@@ -353,18 +353,22 @@
 		return;
 
 	iint->flags &= ~IMA_DONE_MASK;
+	if (digsig)
+		iint->flags |= IMA_DIGSIG;
 	return;
 }
 
 int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		       const void *xattr_value, size_t xattr_value_len)
 {
+	const struct evm_ima_xattr_data *xvalue = xattr_value;
 	int result;
 
 	result = ima_protect_xattr(dentry, xattr_name, xattr_value,
 				   xattr_value_len);
 	if (result == 1) {
-		ima_reset_appraise_flags(dentry->d_inode);
+		ima_reset_appraise_flags(dentry->d_inode,
+			 (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0);
 		result = 0;
 	}
 	return result;
@@ -376,7 +380,7 @@
 
 	result = ima_protect_xattr(dentry, xattr_name, NULL, 0);
 	if (result == 1) {
-		ima_reset_appraise_flags(dentry->d_inode);
+		ima_reset_appraise_flags(dentry->d_inode, 0);
 		result = 0;
 	}
 	return result;

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 1bde8e6..ccd0ac8 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c

@@ -27,6 +27,36 @@
 
 static struct crypto_shash *ima_shash_tfm;
 
+/**
+ * ima_kernel_read - read file content
+ *
+ * This is a function for reading file content instead of kernel_read().
+ * It does not perform locking checks to ensure it cannot be blocked.
+ * It does not perform security checks because it is irrelevant for IMA.
+ *
+ */
+static int ima_kernel_read(struct file *file, loff_t offset,
+			   char *addr, unsigned long count)
+{
+	mm_segment_t old_fs;
+	char __user *buf = addr;
+	ssize_t ret;
+
+	if (!(file->f_mode & FMODE_READ))
+		return -EBADF;
+	if (!file->f_op->read && !file->f_op->aio_read)
+		return -EINVAL;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	if (file->f_op->read)
+		ret = file->f_op->read(file, buf, count, &offset);
+	else
+		ret = do_sync_read(file, buf, count, &offset);
+	set_fs(old_fs);
+	return ret;
+}
+
 int ima_init_crypto(void)
 {
 	long rc;
@@ -104,7 +134,7 @@
 	while (offset < i_size) {
 		int rbuf_len;
 
-		rbuf_len = kernel_read(file, offset, rbuf, PAGE_SIZE);
+		rbuf_len = ima_kernel_read(file, offset, rbuf, PAGE_SIZE);
 		if (rbuf_len < 0) {
 			rc = rbuf_len;
 			break;

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dcc98cf..09baa33 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c

@@ -81,7 +81,6 @@
 {
 	struct inode *inode = file_inode(file);
 	fmode_t mode = file->f_mode;
-	int must_measure;
 	bool send_tomtou = false, send_writers = false;
 	char *pathbuf = NULL;
 	const char *pathname;
@@ -92,18 +91,19 @@
 	mutex_lock(&inode->i_mutex);	/* file metadata: permissions, xattr */
 
 	if (mode & FMODE_WRITE) {
-		if (atomic_read(&inode->i_readcount) && IS_IMA(inode))
-			send_tomtou = true;
-		goto out;
+		if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) {
+			struct integrity_iint_cache *iint;
+			iint = integrity_iint_find(inode);
+			/* IMA_MEASURE is set from reader side */
+			if (iint && (iint->flags & IMA_MEASURE))
+				send_tomtou = true;
+		}
+	} else {
+		if ((atomic_read(&inode->i_writecount) > 0) &&
+		    ima_must_measure(inode, MAY_READ, FILE_CHECK))
+			send_writers = true;
 	}
 
-	must_measure = ima_must_measure(inode, MAY_READ, FILE_CHECK);
-	if (!must_measure)
-		goto out;
-
-	if (atomic_read(&inode->i_writecount) > 0)
-		send_writers = true;
-out:
 	mutex_unlock(&inode->i_mutex);
 
 	if (!send_tomtou && !send_writers)

diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 9ca5e64..225c7315 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c

@@ -660,7 +660,7 @@
 				  int atomic, int hop)
 {
 	struct snd_seq_subscribers *subs;
-	int err = 0, num_ev = 0;
+	int err, result = 0, num_ev = 0;
 	struct snd_seq_event event_saved;
 	struct snd_seq_client_port *src_port;
 	struct snd_seq_port_subs_info *grp;
@@ -685,8 +685,12 @@
 						  subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL);
 		err = snd_seq_deliver_single_event(client, event,
 						   0, atomic, hop);
-		if (err < 0)
-			break;
+		if (err < 0) {
+			/* save first error that occurs and continue */
+			if (!result)
+				result = err;
+			continue;
+		}
 		num_ev++;
 		/* restore original event record */
 		*event = event_saved;
@@ -697,7 +701,7 @@
 		up_read(&grp->list_mutex);
 	*event = event_saved; /* restore */
 	snd_seq_port_unlock(src_port);
-	return (err < 0) ? err : num_ev;
+	return (result < 0) ? result : num_ev;
 }
 
 
@@ -709,7 +713,7 @@
 				struct snd_seq_event *event,
 				int atomic, int hop)
 {
-	int num_ev = 0, err = 0;
+	int num_ev = 0, err, result = 0;
 	struct snd_seq_client *dest_client;
 	struct snd_seq_client_port *port;
 
@@ -724,14 +728,18 @@
 		err = snd_seq_deliver_single_event(NULL, event,
 						   SNDRV_SEQ_FILTER_BROADCAST,
 						   atomic, hop);
-		if (err < 0)
-			break;
+		if (err < 0) {
+			/* save first error that occurs and continue */
+			if (!result)
+				result = err;
+			continue;
+		}
 		num_ev++;
 	}
 	read_unlock(&dest_client->ports_lock);
 	snd_seq_client_unlock(dest_client);
 	event->dest.port = SNDRV_SEQ_ADDRESS_BROADCAST; /* restore */
-	return (err < 0) ? err : num_ev;
+	return (result < 0) ? result : num_ev;
 }
 
 /*
@@ -741,7 +749,7 @@
 static int broadcast_event(struct snd_seq_client *client,
 			   struct snd_seq_event *event, int atomic, int hop)
 {
-	int err = 0, num_ev = 0;
+	int err, result = 0, num_ev = 0;
 	int dest;
 	struct snd_seq_addr addr;
 
@@ -760,12 +768,16 @@
 			err = snd_seq_deliver_single_event(NULL, event,
 							   SNDRV_SEQ_FILTER_BROADCAST,
 							   atomic, hop);
-		if (err < 0)
-			break;
+		if (err < 0) {
+			/* save first error that occurs and continue */
+			if (!result)
+				result = err;
+			continue;
+		}
 		num_ev += err;
 	}
 	event->dest = addr; /* restore */
-	return (err < 0) ? err : num_ev;
+	return (result < 0) ? result : num_ev;
 }
 
 

diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 5599899..53a403e 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c

@@ -124,7 +124,7 @@
 	snd_use_lock_use(&f->use_lock);
 	err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL); /* always non-blocking */
 	if (err < 0) {
-		if (err == -ENOMEM)
+		if ((err == -ENOMEM) || (err == -EAGAIN))
 			atomic_inc(&f->overflow);
 		snd_use_lock_free(&f->use_lock);
 		return err;

diff --git a/sound/core/timer.c b/sound/core/timer.c
index cfd455a..777a45e 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c

@@ -390,7 +390,7 @@
 	struct timespec tstamp;
 
 	if (timer_tstamp_monotonic)
-		do_posix_clock_monotonic_gettime(&tstamp);
+		ktime_get_ts(&tstamp);
 	else
 		getnstimeofday(&tstamp);
 	if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START ||
@@ -1203,7 +1203,7 @@
 	}
 	if (tu->last_resolution != resolution || ticks > 0) {
 		if (timer_tstamp_monotonic)
-			do_posix_clock_monotonic_gettime(&tstamp);
+			ktime_get_ts(&tstamp);
 		else
 			getnstimeofday(&tstamp);
 	}

diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h
index d1c93a1..e13eef9 100644
--- a/sound/firewire/bebob/bebob.h
+++ b/sound/firewire/bebob/bebob.h

@@ -208,8 +208,6 @@
 int snd_bebob_stream_check_internal_clock(struct snd_bebob *bebob,
 					  bool *internal);
 int snd_bebob_stream_discover(struct snd_bebob *bebob);
-int snd_bebob_stream_map(struct snd_bebob *bebob,
-			 struct amdtp_stream *stream);
 int snd_bebob_stream_init_duplex(struct snd_bebob *bebob);
 int snd_bebob_stream_start_duplex(struct snd_bebob *bebob, unsigned int rate);
 void snd_bebob_stream_stop_duplex(struct snd_bebob *bebob);

diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index bc4f827..ef4d0c9 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c

@@ -655,8 +655,6 @@
 	struct amdtp_stream *master, *slave;
 	atomic_t *master_substreams, *slave_substreams;
 
-	mutex_lock(&bebob->mutex);
-
 	if (bebob->master == &bebob->rx_stream) {
 		slave  = &bebob->tx_stream;
 		master = &bebob->rx_stream;
@@ -669,6 +667,8 @@
 		master_substreams = &bebob->capture_substreams;
 	}
 
+	mutex_lock(&bebob->mutex);
+
 	if (atomic_read(slave_substreams) == 0) {
 		amdtp_stream_pcm_abort(slave);
 		amdtp_stream_stop(slave);

diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c
index 996fdc4..3e2ed8e 100644
--- a/sound/firewire/fireworks/fireworks.c
+++ b/sound/firewire/fireworks/fireworks.c

@@ -346,7 +346,6 @@
 {
 	snd_efw_transaction_unregister();
 	driver_unregister(&efw_driver.driver);
-	mutex_destroy(&devices_mutex);
 }
 
 module_init(snd_efw_init);

diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
index d2b36be..4f0201a 100644
--- a/sound/firewire/fireworks/fireworks.h
+++ b/sound/firewire/fireworks/fireworks.h

@@ -162,7 +162,6 @@
 	SND_EFW_CH_TYPE_GUITAR			= 7,
 	SND_EFW_CH_TYPE_PIEZO_GUITAR		= 8,
 	SND_EFW_CH_TYPE_GUITAR_STRING		= 9,
-	SND_EFW_CH_TYPE_VIRTUAL			= 0x10000,
 	SND_EFW_CH_TYPE_DUMMY
 };
 struct snd_efw_phys_meters {

diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
index 4f8216f..33df865 100644
--- a/sound/firewire/fireworks/fireworks_hwdep.c
+++ b/sound/firewire/fireworks/fireworks_hwdep.c

@@ -58,7 +58,7 @@
 			efw->pull_ptr += till_end;
 			if (efw->pull_ptr >= efw->resp_buf +
 					     snd_efw_resp_buf_size)
-				efw->pull_ptr = efw->resp_buf;
+				efw->pull_ptr -= snd_efw_resp_buf_size;
 
 			length -= till_end;
 			buf += till_end;

diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c
index 5415690..b985fc5 100644
--- a/sound/firewire/fireworks/fireworks_stream.c
+++ b/sound/firewire/fireworks/fireworks_stream.c

@@ -284,8 +284,6 @@
 	struct amdtp_stream *master, *slave;
 	atomic_t *master_substreams, *slave_substreams;
 
-	mutex_lock(&efw->mutex);
-
 	if (efw->master == &efw->rx_stream) {
 		slave  = &efw->tx_stream;
 		master = &efw->rx_stream;
@@ -298,6 +296,8 @@
 		master_substreams = &efw->capture_substreams;
 	}
 
+	mutex_lock(&efw->mutex);
+
 	if (atomic_read(slave_substreams) == 0) {
 		stop_stream(efw, slave);
 

diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c
index aa56b8a..255dabc 100644
--- a/sound/firewire/fireworks/fireworks_transaction.c
+++ b/sound/firewire/fireworks/fireworks_transaction.c

@@ -8,19 +8,19 @@
 
 /*
  * Fireworks have its own transaction. The transaction can be delivered by AV/C
- * Vendor Specific command. But at least Windows driver and firmware version 5.5
- * or later don't use it.
+ * Vendor Specific command frame or usual asynchronous transaction. At least,
+ * Windows driver and firmware version 5.5 or later don't use AV/C command.
  *
  * Transaction substance:
- *  At first, 6 data exist. Following to the 6 data, parameters for each
- *  commands exists. All of parameters are 32 bit alighed to big endian.
+ *  At first, 6 data exist. Following to the data, parameters for each command
+ *  exist. All of the parameters are 32 bit alighed to big endian.
  *   data[0]:	Length of transaction substance
  *   data[1]:	Transaction version
  *   data[2]:	Sequence number. This is incremented by the device
- *   data[3]:	transaction category
- *   data[4]:	transaction command
- *   data[5]:	return value in response.
- *   data[6-]:	parameters
+ *   data[3]:	Transaction category
+ *   data[4]:	Transaction command
+ *   data[5]:	Return value in response.
+ *   data[6-]:	Parameters
  *
  * Transaction address:
  *  command:	0xecc000000000
@@ -148,7 +148,7 @@
 
 		efw->push_ptr += till_end;
 		if (efw->push_ptr >= efw->resp_buf + snd_efw_resp_buf_size)
-			efw->push_ptr = efw->resp_buf;
+			efw->push_ptr -= snd_efw_resp_buf_size;
 
 		length -= till_end;
 		data += till_end;

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index cd77b9b..bb65a124 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -237,6 +237,12 @@
 	 AZX_DCAPS_COUNT_LPIB_DELAY | AZX_DCAPS_PM_RUNTIME | \
 	 AZX_DCAPS_I915_POWERWELL)
 
+/* Broadwell HDMI can't use position buffer reliably, force to use LPIB */
+#define AZX_DCAPS_INTEL_BROADWELL \
+	(AZX_DCAPS_SCH_SNOOP | AZX_DCAPS_ALIGN_BUFSIZE | \
+	 AZX_DCAPS_POSFIX_LPIB | AZX_DCAPS_PM_RUNTIME | \
+	 AZX_DCAPS_I915_POWERWELL)
+
 /* quirks for ATI SB / AMD Hudson */
 #define AZX_DCAPS_PRESET_ATI_SB \
 	(AZX_DCAPS_ATI_SNOOP | AZX_DCAPS_NO_TCSEL | \
@@ -1367,12 +1373,6 @@
 	/* initialize streams */
 	azx_init_stream(chip);
 
-	/* workaround for Broadwell HDMI: the first stream is broken,
-	 * so mask it by keeping it as if opened
-	 */
-	if (pci->vendor == 0x8086 && pci->device == 0x160c)
-		chip->azx_dev[0].opened = 1;
-
 	/* initialize chip */
 	azx_init_pci(chip);
 	azx_init_chip(chip, (probe_only[dev] & 2) == 0);
@@ -1769,7 +1769,7 @@
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
 	/* Broadwell */
 	{ PCI_DEVICE(0x8086, 0x160c),
-	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
+	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_BROADWELL },
 	/* 5 Series/3400 */
 	{ PCI_DEVICE(0x8086, 0x3b56),
 	  .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index be0a9ee..3e4417b 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c

@@ -1594,10 +1594,18 @@
 		 * Re-setup pin and infoframe. This is needed e.g. when
 		 * - sink is first plugged-in (infoframe is not set up if !monitor_present)
 		 * - transcoder can change during stream playback on Haswell
+		 *   and this can make HW reset converter selection on a pin.
 		 */
-		if (eld->eld_valid && !old_eld_valid && per_pin->setup)
+		if (eld->eld_valid && !old_eld_valid && per_pin->setup) {
+			if (is_haswell_plus(codec) || is_valleyview(codec)) {
+				intel_verify_pin_cvt_connect(codec, per_pin);
+				intel_not_share_assigned_cvt(codec, pin_nid,
+							per_pin->mux_idx);
+			}
+
 			hdmi_setup_audio_infoframe(codec, per_pin,
 						   per_pin->non_pcm);
+		}
 	}
 
 	if (eld_changed)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 12fb411..af76995 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -929,6 +929,7 @@
 };
 
 static struct alc_codec_rename_table rename_tbl[] = {
+	{ 0x10ec0221, 0xf00f, 0x1003, "ALC231" },
 	{ 0x10ec0269, 0xfff0, 0x3010, "ALC277" },
 	{ 0x10ec0269, 0xf0f0, 0x2010, "ALC259" },
 	{ 0x10ec0269, 0xf0f0, 0x3010, "ALC258" },
@@ -937,6 +938,7 @@
 	{ 0x10ec0269, 0xffff, 0x6023, "ALC281X" },
 	{ 0x10ec0269, 0x00f0, 0x0020, "ALC269VC" },
 	{ 0x10ec0269, 0x00f0, 0x0030, "ALC269VD" },
+	{ 0x10ec0662, 0xffff, 0x4020, "ALC656" },
 	{ 0x10ec0887, 0x00f0, 0x0030, "ALC887-VD" },
 	{ 0x10ec0888, 0x00f0, 0x0030, "ALC888-VD" },
 	{ 0x10ec0888, 0xf0f0, 0x3020, "ALC886" },
@@ -956,6 +958,19 @@
 	{ 0x10ec0293, 0x1028, 0, "ALC3235" },
 	{ 0x10ec0255, 0x1028, 0, "ALC3234" },
 	{ 0x10ec0668, 0x1028, 0, "ALC3661" },
+	{ 0x10ec0275, 0x1028, 0, "ALC3260" },
+	{ 0x10ec0899, 0x1028, 0, "ALC3861" },
+	{ 0x10ec0670, 0x1025, 0, "ALC669X" },
+	{ 0x10ec0676, 0x1025, 0, "ALC679X" },
+	{ 0x10ec0282, 0x1043, 0, "ALC3229" },
+	{ 0x10ec0233, 0x1043, 0, "ALC3236" },
+	{ 0x10ec0280, 0x103c, 0, "ALC3228" },
+	{ 0x10ec0282, 0x103c, 0, "ALC3227" },
+	{ 0x10ec0286, 0x103c, 0, "ALC3242" },
+	{ 0x10ec0290, 0x103c, 0, "ALC3241" },
+	{ 0x10ec0668, 0x103c, 0, "ALC3662" },
+	{ 0x10ec0283, 0x17aa, 0, "ALC3239" },
+	{ 0x10ec0292, 0x17aa, 0, "ALC3232" },
 	{ } /* terminator */
 };
 
@@ -1412,6 +1427,7 @@
 	SND_PCI_QUIRK(0x1043, 0x10c3, "ASUS W5A", ALC880_FIXUP_ASUS_W5A),
 	SND_PCI_QUIRK(0x1043, 0x1964, "ASUS Z71V", ALC880_FIXUP_Z71V),
 	SND_PCI_QUIRK_VENDOR(0x1043, "ASUS", ALC880_FIXUP_GPIO1),
+	SND_PCI_QUIRK(0x147b, 0x1045, "ABit AA8XE", ALC880_FIXUP_6ST_AUTOMUTE),
 	SND_PCI_QUIRK(0x1558, 0x5401, "Clevo GPIO2", ALC880_FIXUP_GPIO2),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", ALC880_FIXUP_EAPD_COEF),
 	SND_PCI_QUIRK(0x1584, 0x9050, "Uniwill", ALC880_FIXUP_UNIWILL_DIG),
@@ -4230,6 +4246,7 @@
 	ALC269_FIXUP_HEADSET_MIC,
 	ALC269_FIXUP_QUANTA_MUTE,
 	ALC269_FIXUP_LIFEBOOK,
+	ALC269_FIXUP_LIFEBOOK_EXTMIC,
 	ALC269_FIXUP_AMIC,
 	ALC269_FIXUP_DMIC,
 	ALC269VB_FIXUP_AMIC,
@@ -4367,6 +4384,13 @@
 		.chained = true,
 		.chain_id = ALC269_FIXUP_QUANTA_MUTE
 	},
+	[ALC269_FIXUP_LIFEBOOK_EXTMIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x01a1903c }, /* headset mic, with jack detect */
+			{ }
+		},
+	},
 	[ALC269_FIXUP_AMIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -4741,18 +4765,12 @@
 	SND_PCI_QUIRK(0x1028, 0x0614, "Dell Inspiron 3135", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
-	SND_PCI_QUIRK(0x1028, 0x062c, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x063f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x064d, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0668, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0669, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x0674, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x067e, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x067f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1028, 0x0680, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0684, "Dell", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x15cc, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x15cd, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
@@ -4764,14 +4782,24 @@
 	SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x218b, "HP", ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED),
 	/* ALC282 */
+	SND_PCI_QUIRK(0x103c, 0x220d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x220e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x220f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2210, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2211, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2212, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2213, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2214, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2266, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2267, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2268, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2269, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x226a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x226b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x226f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x227a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x227b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x229e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -4811,6 +4839,10 @@
 	SND_PCI_QUIRK(0x103c, 0x22c8, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x22c3, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x22c4, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2334, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -4834,6 +4866,7 @@
 	SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
 	SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
 	SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+	SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
@@ -4977,6 +5010,26 @@
 #endif
 		.pins = (const struct hda_pintbl[]) {
 			{0x12, 0x90a60160},
+			{0x14, 0x90170120},
+			{0x17, 0x90170140},
+			{0x18, 0x40000000},
+			{0x19, 0x411111f0},
+			{0x1a, 0x411111f0},
+			{0x1b, 0x411111f0},
+			{0x1d, 0x41163b05},
+			{0x1e, 0x411111f0},
+			{0x21, 0x0321102f},
+		},
+		.value = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+	},
+	{
+		.codec = 0x10ec0255,
+		.subvendor = 0x1028,
+#ifdef CONFIG_SND_DEBUG_VERBOSE
+		.name = "Dell",
+#endif
+		.pins = (const struct hda_pintbl[]) {
+			{0x12, 0x90a60160},
 			{0x14, 0x90170130},
 			{0x17, 0x40000000},
 			{0x18, 0x411111f0},
@@ -5129,7 +5182,7 @@
 			{0x1d, 0x40700001},
 			{0x1e, 0x411111f0},
 		},
-		.value = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+		.value = ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
 	},
 	{}
 };
@@ -6014,6 +6067,27 @@
 		.name = "Dell",
 #endif
 		.pins = (const struct hda_pintbl[]) {
+			{0x12, 0x99a30140},
+			{0x14, 0x90170110},
+			{0x15, 0x0321101f},
+			{0x16, 0x03011020},
+			{0x18, 0x40000008},
+			{0x19, 0x411111f0},
+			{0x1a, 0x411111f0},
+			{0x1b, 0x411111f0},
+			{0x1d, 0x41000001},
+			{0x1e, 0x411111f0},
+			{0x1f, 0x411111f0},
+		},
+		.value = ALC668_FIXUP_AUTO_MUTE,
+	},
+	{
+		.codec = 0x10ec0668,
+		.subvendor = 0x1028,
+#ifdef CONFIG_SND_DEBUG_VERBOSE
+		.name = "Dell",
+#endif
+		.pins = (const struct hda_pintbl[]) {
 			{0x12, 0x99a30150},
 			{0x14, 0x90170110},
 			{0x15, 0x0321101f},
@@ -6190,6 +6264,7 @@
 	{ .id = 0x10ec0221, .name = "ALC221", .patch = patch_alc269 },
 	{ .id = 0x10ec0231, .name = "ALC231", .patch = patch_alc269 },
 	{ .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 },
+	{ .id = 0x10ec0235, .name = "ALC233", .patch = patch_alc269 },
 	{ .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 },
 	{ .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 },
 	{ .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 },
@@ -6223,10 +6298,12 @@
 	  .patch = patch_alc662 },
 	{ .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
 	{ .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
+	{ .id = 0x10ec0667, .name = "ALC667", .patch = patch_alc662 },
 	{ .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
 	{ .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
 	{ .id = 0x10ec0671, .name = "ALC671", .patch = patch_alc662 },
 	{ .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
+	{ .id = 0x10ec0867, .name = "ALC891", .patch = patch_alc882 },
 	{ .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
 	{ .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 },
 	{ .id = 0x10ec0883, .name = "ALC883", .patch = patch_alc882 },

diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 68340d7..c91860e 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c

@@ -2779,7 +2779,7 @@
 	unsigned long port;
 	unsigned long pos, pos1, t;
 	int civ, timeout = 1000, attempt = 1;
-	struct timespec start_time, stop_time;
+	ktime_t start_time, stop_time;
 
 	if (chip->ac97_bus->clock != 48000)
 		return; /* specified in module option */
@@ -2813,7 +2813,7 @@
 		iputbyte(chip, port + ICH_REG_OFF_CR, ICH_IOCE);
 		iputdword(chip, ICHREG(ALI_DMACR), 1 << ichdev->ali_slot);
 	}
-	do_posix_clock_monotonic_gettime(&start_time);
+	start_time = ktime_get();
 	spin_unlock_irq(&chip->reg_lock);
 	msleep(50);
 	spin_lock_irq(&chip->reg_lock);
@@ -2837,7 +2837,7 @@
 		pos += ichdev->position;
 	}
 	chip->in_measurement = 0;
-	do_posix_clock_monotonic_gettime(&stop_time);
+	stop_time = ktime_get();
 	/* stop */
 	if (chip->device_type == DEVICE_ALI) {
 		iputdword(chip, ICHREG(ALI_DMACR), 1 << (ichdev->ali_slot + 16));
@@ -2865,9 +2865,7 @@
 	}
 
 	pos /= 4;
-	t = stop_time.tv_sec - start_time.tv_sec;
-	t *= 1000000;
-	t += (stop_time.tv_nsec - start_time.tv_nsec) / 1000;
+	t = ktime_us_delta(stop_time, start_time);
 	dev_info(chip->card->dev,
 		 "%s: measured %lu usecs (%lu samples)\n", __func__, t, pos);
 	if (t == 0) {

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 5b5eb78..c1b49c3 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c

@@ -1,8 +1,10 @@
 /* TODO merge/factor in debugfs.c here */
 
+#include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/vfs.h>
 
@@ -96,12 +98,51 @@
 	return false;
 }
 
+static void mem_toupper(char *f, size_t len)
+{
+	while (len) {
+		*f = toupper(*f);
+		f++;
+		len--;
+	}
+}
+
+/*
+ * Check for "NAME_PATH" environment variable to override fs location (for
+ * testing). This matches the recommendation in Documentation/sysfs-rules.txt
+ * for SYSFS_PATH.
+ */
+static bool fs__env_override(struct fs *fs)
+{
+	char *override_path;
+	size_t name_len = strlen(fs->name);
+	/* name + "_PATH" + '\0' */
+	char upper_name[name_len + 5 + 1];
+	memcpy(upper_name, fs->name, name_len);
+	mem_toupper(upper_name, name_len);
+	strcpy(&upper_name[name_len], "_PATH");
+
+	override_path = getenv(upper_name);
+	if (!override_path)
+		return false;
+
+	fs->found = true;
+	strncpy(fs->path, override_path, sizeof(fs->path));
+	return true;
+}
+
 static const char *fs__get_mountpoint(struct fs *fs)
 {
+	if (fs__env_override(fs))
+		return fs->path;
+
 	if (fs__check_mounts(fs))
 		return fs->path;
 
-	return fs__read_mounts(fs) ? fs->path : NULL;
+	if (fs__read_mounts(fs))
+		return fs->path;
+
+	return NULL;
 }
 
 static const char *fs__mountpoint(int idx)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index c71b0f3..d460049 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt

@@ -184,9 +184,10 @@
 	- in_tx: only when the target is in a hardware transaction
 	- no_tx: only when the target is not in a hardware transaction
 	- abort_tx: only when the target is a hardware transaction abort
+	- cond: conditional branches
 
 +
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
 The privilege levels may be omitted, in which case, the privilege levels of the associated
 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
 levels are subject to permissions.  When sampling on multiple events, branch stack sampling

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a1b5185..cefdf43 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt

@@ -111,7 +111,7 @@
 --fields=::
 	Specify output field - multiple keys can be specified in CSV format.
 	Following fields are available:
-	overhead, overhead_sys, overhead_us, sample and period.
+	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
 	Also it can contain any sort key(s).
 
 	By default, every sort keys not specified in -F will be appended
@@ -163,6 +163,11 @@
 
 	Default: fractal,0.5,callee,function.
 
+--children::
+	Accumulate callchain of children to parent entry so that then can
+	show up in the output.  The output will have a new "Children" column
+	and will be sorted on the data.  It requires callchains are recorded.
+
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
 	beyond the specified depth will be ignored. This is a trade-off

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index dcfa54c..180ae02 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt

@@ -119,7 +119,7 @@
 --fields=::
 	Specify output field - multiple keys can be specified in CSV format.
 	Following fields are available:
-	overhead, overhead_sys, overhead_us, sample and period.
+	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
 	Also it can contain any sort key(s).
 
 	By default, every sort keys not specified in --field will be appended
@@ -161,6 +161,12 @@
 	Setup and enable call-graph (stack chain/backtrace) recording,
 	implies -g.
 
+--children::
+	Accumulate callchain of children to parent entry so that then can
+	show up in the output.  The output will have a new "Children" column
+	and will be sorted on the data.  It requires -g/--call-graph option
+	enabled.
+
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
 	beyond the specified depth will be ignored. This is a trade-off

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 02f0a4d..ae20edf 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -400,6 +400,7 @@
 LIB_OBJS += $(OUTPUT)tests/hists_link.o
 LIB_OBJS += $(OUTPUT)tests/hists_filter.o
 LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
 LIB_OBJS += $(OUTPUT)tests/python-use.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
@@ -788,8 +789,8 @@
 	@echo ''
 	@echo 'Perf install targets:'
 	@echo '  NOTE: documentation build requires asciidoc, xmlto packages to be installed'
-	@echo '  HINT: use "make prefix=<path> <install target>" to install to a particular'
-	@echo '        path like make prefix=/usr/local install install-doc'
+	@echo '  HINT: use "prefix" or "DESTDIR" to install to a particular'
+	@echo '        path like "make prefix=/usr/local install install-doc"'
 	@echo '  install	- install compiled binaries'
 	@echo '  install-doc	- install *all* documentation'
 	@echo '  install-man	- install manpage documentation'
@@ -814,17 +815,20 @@
 $(DOC_TARGETS):
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
 
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
 TAGS:
 	$(RM) TAGS
-	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
 
 tags:
 	$(RM) tags
-	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
 
 cscope:
 	$(RM) cscope*
-	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
 
 ### Detect prefix changes
 TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index d30d2c2..1ec429f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c

@@ -65,12 +65,13 @@
 		return 0;
 	}
 
-	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
+	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+				true);
 	if (he == NULL)
 		return -ENOMEM;
 
 	ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
-	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+	hists__inc_nr_samples(&evsel->hists, true);
 	return ret;
 }
 

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8bff543..9a5a035 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -315,7 +315,7 @@
 			    u64 weight, u64 transaction)
 {
 	if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
-			       transaction) != NULL)
+			       transaction, true) != NULL)
 		return 0;
 	return -ENOMEM;
 }

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e4c85b8..378b85b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c

@@ -454,7 +454,11 @@
 			if (done)
 				break;
 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
-			if (err < 0 && errno == EINTR)
+			/*
+			 * Propagate error, only if there's any. Ignore positive
+			 * number of returned events and interrupt error.
+			 */
+			if (err > 0 || (err < 0 && errno == EINTR))
 				err = 0;
 			waking++;
 		}
@@ -544,6 +548,7 @@
 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
 	BRANCH_END
 };
 

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bc0eec1..21d830b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -72,6 +72,10 @@
 		rep->min_percent = strtof(value, NULL);
 		return 0;
 	}
+	if (!strcmp(var, "report.children")) {
+		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+		return 0;
+	}
 
 	return perf_default_config(var, value, cb);
 }
@@ -85,156 +89,52 @@
 	 */
 	if (he->stat.nr_events == 1)
 		rep->nr_entries++;
-
-	/*
-	 * Only counts number of samples at this stage as it's more
-	 * natural to do it here and non-sample events are also
-	 * counted in perf_session_deliver_event().  The dump_trace
-	 * requires this info is ready before going to the output tree.
-	 */
-	hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
-	if (!he->filtered)
-		he->hists->stats.nr_non_filtered_samples++;
 }
 
-static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
-				      struct perf_sample *sample, struct perf_evsel *evsel)
+static int hist_iter__report_callback(struct hist_entry_iter *iter,
+				      struct addr_location *al, bool single,
+				      void *arg)
 {
-	struct symbol *parent = NULL;
-	struct hist_entry *he;
-	struct mem_info *mi, *mx;
-	uint64_t cost;
-	int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
+	int err = 0;
+	struct report *rep = arg;
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+	struct mem_info *mi;
+	struct branch_info *bi;
 
-	if (err)
-		return err;
+	report__inc_stats(rep, he);
 
-	mi = sample__resolve_mem(sample, al);
-	if (!mi)
-		return -ENOMEM;
-
-	if (rep->hide_unresolved && !al->sym)
+	if (!ui__has_annotation())
 		return 0;
 
-	cost = sample->weight;
-	if (!cost)
-		cost = 1;
-
-	/*
-	 * must pass period=weight in order to get the correct
-	 * sorting from hists__collapse_resort() which is solely
-	 * based on periods. We want sorting be done on nr_events * weight
-	 * and this is indirectly achieved by passing period=weight here
-	 * and the he_stat__add_period() function.
-	 */
-	he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
-				cost, cost, 0);
-	if (!he)
-		return -ENOMEM;
-
-	if (ui__has_annotation()) {
-		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+	if (sort__mode == SORT_MODE__BRANCH) {
+		bi = he->branch_info;
+		err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
 		if (err)
 			goto out;
 
-		mx = he->mem_info;
-		err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
+		err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
+
+	} else if (rep->mem_mode) {
+		mi = he->mem_info;
+		err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
 		if (err)
 			goto out;
-	}
 
-	report__inc_stats(rep, he);
-
-	err = hist_entry__append_callchain(he, sample);
-out:
-	return err;
-}
-
-static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
-					 struct perf_sample *sample, struct perf_evsel *evsel)
-{
-	struct symbol *parent = NULL;
-	unsigned i;
-	struct hist_entry *he;
-	struct branch_info *bi, *bx;
-	int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
-	if (err)
-		return err;
-
-	bi = sample__resolve_bstack(sample, al);
-	if (!bi)
-		return -ENOMEM;
-
-	for (i = 0; i < sample->branch_stack->nr; i++) {
-		if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
-			continue;
-
-		err = -ENOMEM;
-
-		/* overwrite the 'al' to branch-to info */
-		al->map = bi[i].to.map;
-		al->sym = bi[i].to.sym;
-		al->addr = bi[i].to.addr;
-		/*
-		 * The report shows the percentage of total branches captured
-		 * and not events sampled. Thus we use a pseudo period of 1.
-		 */
-		he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
-					1, 1, 0);
-		if (he) {
-			if (ui__has_annotation()) {
-				bx = he->branch_info;
-				err = addr_map_symbol__inc_samples(&bx->from,
-								   evsel->idx);
-				if (err)
-					goto out;
-
-				err = addr_map_symbol__inc_samples(&bx->to,
-								   evsel->idx);
-				if (err)
-					goto out;
-			}
-			report__inc_stats(rep, he);
-		} else
-			goto out;
-	}
-	err = 0;
-out:
-	free(bi);
-	return err;
-}
-
-static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
-				  struct addr_location *al, struct perf_sample *sample)
-{
-	struct symbol *parent = NULL;
-	struct hist_entry *he;
-	int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
-	if (err)
-		return err;
-
-	he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
-				sample->period, sample->weight,
-				sample->transaction);
-	if (he == NULL)
-		return -ENOMEM;
-
-	err = hist_entry__append_callchain(he, sample);
-	if (err)
-		goto out;
-
-	if (ui__has_annotation())
 		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
 
-	report__inc_stats(rep, he);
+	} else if (symbol_conf.cumulate_callchain) {
+		if (single)
+			err = hist_entry__inc_addr_samples(he, evsel->idx,
+							   al->addr);
+	} else {
+		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+	}
 
 out:
 	return err;
 }
 
-
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
@@ -243,6 +143,10 @@
 {
 	struct report *rep = container_of(tool, struct report, tool);
 	struct addr_location al;
+	struct hist_entry_iter iter = {
+		.hide_unresolved = rep->hide_unresolved,
+		.add_entry_cb = hist_iter__report_callback,
+	};
 	int ret;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
@@ -257,22 +161,23 @@
 	if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
 		return 0;
 
-	if (sort__mode == SORT_MODE__BRANCH) {
-		ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
-		if (ret < 0)
-			pr_debug("problem adding lbr entry, skipping event\n");
-	} else if (rep->mem_mode == 1) {
-		ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
-		if (ret < 0)
-			pr_debug("problem adding mem entry, skipping event\n");
-	} else {
-		if (al.map != NULL)
-			al.map->dso->hit = 1;
+	if (sort__mode == SORT_MODE__BRANCH)
+		iter.ops = &hist_iter_branch;
+	else if (rep->mem_mode)
+		iter.ops = &hist_iter_mem;
+	else if (symbol_conf.cumulate_callchain)
+		iter.ops = &hist_iter_cumulative;
+	else
+		iter.ops = &hist_iter_normal;
 
-		ret = report__add_hist_entry(rep, evsel, &al, sample);
-		if (ret < 0)
-			pr_debug("problem incrementing symbol period, skipping event\n");
-	}
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
+				   rep);
+	if (ret < 0)
+		pr_debug("problem adding hist entry, skipping event\n");
+
 	return ret;
 }
 
@@ -329,6 +234,14 @@
 			}
 	}
 
+	if (symbol_conf.cumulate_callchain) {
+		/* Silently ignore if callchain is missing */
+		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+			symbol_conf.cumulate_callchain = false;
+			perf_hpp__cancel_cumulate();
+		}
+	}
+
 	if (sort__mode == SORT_MODE__BRANCH) {
 		if (!is_pipe &&
 		    !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
@@ -712,6 +625,8 @@
 	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
 		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
 		     "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
+	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+		    "Accumulate callchains of children and show total overhead as well"),
 	OPT_INTEGER(0, "max-stack", &report.max_stack,
 		    "Set the maximum stack depth when parsing the callchain, "
 		    "anything beyond the specified depth will be ignored. "
@@ -804,8 +719,10 @@
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
 
-	if (branch_mode == -1 && has_br_stack)
+	if (branch_mode == -1 && has_br_stack) {
 		sort__mode = SORT_MODE__BRANCH;
+		symbol_conf.cumulate_callchain = false;
+	}
 
 	if (report.mem_mode) {
 		if (sort__mode == SORT_MODE__BRANCH) {
@@ -813,6 +730,7 @@
 			goto error;
 		}
 		sort__mode = SORT_MODE__MEMORY;
+		symbol_conf.cumulate_callchain = false;
 	}
 
 	if (setup_sorting() < 0) {

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d717683..c38d06c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c

@@ -1428,7 +1428,7 @@
 	int err = 0;
 
 	evsel->hists.stats.total_period += sample->period;
-	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+	hists__inc_nr_samples(&evsel->hists, true);
 
 	if (evsel->handler != NULL) {
 		tracepoint_handler f = evsel->handler;

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5b389ce..377971d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -196,6 +196,12 @@
 
 	pthread_mutex_unlock(&notes->lock);
 
+	/*
+	 * This function is now called with he->hists->lock held.
+	 * Release it before going to sleep.
+	 */
+	pthread_mutex_unlock(&he->hists->lock);
+
 	if (err == -ERANGE && !he->ms.map->erange_warned)
 		ui__warn_map_erange(he->ms.map, sym, ip);
 	else if (err == -ENOMEM) {
@@ -203,6 +209,8 @@
 		       sym->name);
 		sleep(1);
 	}
+
+	pthread_mutex_lock(&he->hists->lock);
 }
 
 static void perf_top__show_details(struct perf_top *top)
@@ -238,27 +246,6 @@
 	pthread_mutex_unlock(&notes->lock);
 }
 
-static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
-						     struct addr_location *al,
-						     struct perf_sample *sample)
-{
-	struct hist_entry *he;
-
-	pthread_mutex_lock(&evsel->hists.lock);
-	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
-				sample->period, sample->weight,
-				sample->transaction);
-	pthread_mutex_unlock(&evsel->hists.lock);
-	if (he == NULL)
-		return NULL;
-
-	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
-	if (!he->filtered)
-		evsel->hists.stats.nr_non_filtered_samples++;
-
-	return he;
-}
-
 static void perf_top__print_sym_table(struct perf_top *top)
 {
 	char bf[160];
@@ -662,6 +649,26 @@
 	return 0;
 }
 
+static int hist_iter__top_callback(struct hist_entry_iter *iter,
+				   struct addr_location *al, bool single,
+				   void *arg)
+{
+	struct perf_top *top = arg;
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+
+	if (sort__has_sym && single) {
+		u64 ip = al->addr;
+
+		if (al->map)
+			ip = al->map->unmap_ip(al->map, ip);
+
+		perf_top__record_precise_ip(top, he, evsel->idx, ip);
+	}
+
+	return 0;
+}
+
 static void perf_event__process_sample(struct perf_tool *tool,
 				       const union perf_event *event,
 				       struct perf_evsel *evsel,
@@ -669,8 +676,6 @@
 				       struct machine *machine)
 {
 	struct perf_top *top = container_of(tool, struct perf_top, tool);
-	struct symbol *parent = NULL;
-	u64 ip = sample->ip;
 	struct addr_location al;
 	int err;
 
@@ -745,25 +750,23 @@
 	}
 
 	if (al.sym == NULL || !al.sym->ignore) {
-		struct hist_entry *he;
+		struct hist_entry_iter iter = {
+			.add_entry_cb = hist_iter__top_callback,
+		};
 
-		err = sample__resolve_callchain(sample, &parent, evsel, &al,
-						top->max_stack);
-		if (err)
-			return;
+		if (symbol_conf.cumulate_callchain)
+			iter.ops = &hist_iter_cumulative;
+		else
+			iter.ops = &hist_iter_normal;
 
-		he = perf_evsel__add_hist_entry(evsel, &al, sample);
-		if (he == NULL) {
+		pthread_mutex_lock(&evsel->hists.lock);
+
+		err = hist_entry_iter__add(&iter, &al, evsel, sample,
+					   top->max_stack, top);
+		if (err < 0)
 			pr_err("Problem incrementing symbol period, skipping event\n");
-			return;
-		}
 
-		err = hist_entry__append_callchain(he, sample);
-		if (err)
-			return;
-
-		if (sort__has_sym)
-			perf_top__record_precise_ip(top, he, evsel->idx, ip);
+		pthread_mutex_unlock(&evsel->hists.lock);
 	}
 
 	return;
@@ -1001,6 +1004,10 @@
 
 	if (!strcmp(var, "top.call-graph"))
 		return record_parse_callchain(value, &top->record_opts);
+	if (!strcmp(var, "top.children")) {
+		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+		return 0;
+	}
 
 	return perf_default_config(var, value, cb);
 }
@@ -1095,6 +1102,8 @@
 	OPT_CALLBACK(0, "call-graph", &top.record_opts,
 		     "mode[,dump_size]", record_callchain_help,
 		     &parse_callchain_opt),
+	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+		    "Accumulate callchains of children and show total overhead as well"),
 	OPT_INTEGER(0, "max-stack", &top.max_stack,
 		    "Set the maximum stack depth when parsing the callchain. "
 		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
@@ -1200,6 +1209,11 @@
 
 	top.sym_evsel = perf_evlist__first(top.evlist);
 
+	if (!symbol_conf.use_callchain) {
+		symbol_conf.cumulate_callchain = false;
+		perf_hpp__cancel_cumulate();
+	}
+
 	symbol_conf.priv_size = sizeof(struct annotation);
 
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 729bbdf..4f100b5 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile

@@ -447,6 +447,7 @@
   ifneq ($(feature-libperl), 1)
     CFLAGS += -DNO_LIBPERL
     NO_LIBPERL := 1
+    msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
   else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
@@ -599,7 +600,7 @@
 
 # Make the path relative to DESTDIR, not to prefix
 ifndef DESTDIR
-prefix = $(HOME)
+prefix ?= $(HOME)
 endif
 bindir_relative = bin
 bindir = $(prefix)/$(bindir_relative)

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 431798a..78f7b92 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c

@@ -481,14 +481,18 @@
 		fprintf(stderr, "cannot handle %s internally", cmd);
 		goto out;
 	}
-#ifdef HAVE_LIBAUDIT_SUPPORT
 	if (!prefixcmp(cmd, "trace")) {
+#ifdef HAVE_LIBAUDIT_SUPPORT
 		set_buildid_dir();
 		setup_path();
 		argv[0] = "trace";
 		return cmd_trace(argc, argv, NULL);
-	}
+#else
+		fprintf(stderr,
+			"trace command not available: missing audit-libs devel package at build time.\n");
+		goto out;
 #endif
+	}
 	/* Look for flags.. */
 	argv++;
 	argc--;

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 831f52c..802e3cd 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c

@@ -140,6 +140,10 @@
 		.func = test__hists_output,
 	},
 	{
+		.desc = "Test cumulation of child hist entries",
+		.func = test__hists_cumulate,
+	},
+	{
 		.func = NULL,
 	},
 };

diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index e4e01aad..a62c091 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c

@@ -12,9 +12,9 @@
 	u32 pid;
 	const char *comm;
 } fake_threads[] = {
-	{ 100, "perf" },
-	{ 200, "perf" },
-	{ 300, "bash" },
+	{ FAKE_PID_PERF1, "perf" },
+	{ FAKE_PID_PERF2, "perf" },
+	{ FAKE_PID_BASH,  "bash" },
 };
 
 static struct {
@@ -22,15 +22,15 @@
 	u64 start;
 	const char *filename;
 } fake_mmap_info[] = {
-	{ 100, 0x40000, "perf" },
-	{ 100, 0x50000, "libc" },
-	{ 100, 0xf0000, "[kernel]" },
-	{ 200, 0x40000, "perf" },
-	{ 200, 0x50000, "libc" },
-	{ 200, 0xf0000, "[kernel]" },
-	{ 300, 0x40000, "bash" },
-	{ 300, 0x50000, "libc" },
-	{ 300, 0xf0000, "[kernel]" },
+	{ FAKE_PID_PERF1, FAKE_MAP_PERF,   "perf" },
+	{ FAKE_PID_PERF1, FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
+	{ FAKE_PID_PERF2, FAKE_MAP_PERF,   "perf" },
+	{ FAKE_PID_PERF2, FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
+	{ FAKE_PID_BASH,  FAKE_MAP_BASH,   "bash" },
+	{ FAKE_PID_BASH,  FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_BASH,  FAKE_MAP_KERNEL, "[kernel]" },
 };
 
 struct fake_sym {
@@ -40,27 +40,30 @@
 };
 
 static struct fake_sym perf_syms[] = {
-	{ 700, 100, "main" },
-	{ 800, 100, "run_command" },
-	{ 900, 100, "cmd_record" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
 };
 
 static struct fake_sym bash_syms[] = {
-	{ 700, 100, "main" },
-	{ 800, 100, "xmalloc" },
-	{ 900, 100, "xfree" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
 };
 
 static struct fake_sym libc_syms[] = {
 	{ 700, 100, "malloc" },
 	{ 800, 100, "free" },
 	{ 900, 100, "realloc" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
 };
 
 static struct fake_sym kernel_syms[] = {
-	{ 700, 100, "schedule" },
-	{ 800, 100, "page_fault" },
-	{ 900, 100, "sys_perf_event_open" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
 };
 
 static struct {
@@ -102,7 +105,7 @@
 				.pid = fake_mmap_info[i].pid,
 				.tid = fake_mmap_info[i].pid,
 				.start = fake_mmap_info[i].start,
-				.len = 0x1000ULL,
+				.len = FAKE_MAP_LENGTH,
 				.pgoff = 0ULL,
 			},
 		};
@@ -193,10 +196,11 @@
 		he = rb_entry(node, struct hist_entry, rb_node);
 
 		if (!he->filtered) {
-			pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
+			pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
 				i, thread__comm_str(he->thread), he->thread->tid,
 				he->ms.map->dso->short_name,
-				he->ms.sym->name, he->stat.period);
+				he->ms.sym->name, he->stat.period,
+				he->stat_acc ? he->stat_acc->period : 0);
 		}
 
 		i++;

diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h
index 1415ae6..888254e 100644
--- a/tools/perf/tests/hists_common.h
+++ b/tools/perf/tests/hists_common.h

@@ -4,6 +4,34 @@
 struct machine;
 struct machines;
 
+#define FAKE_PID_PERF1  100
+#define FAKE_PID_PERF2  200
+#define FAKE_PID_BASH   300
+
+#define FAKE_MAP_PERF    0x400000
+#define FAKE_MAP_BASH    0x400000
+#define FAKE_MAP_LIBC    0x500000
+#define FAKE_MAP_KERNEL  0xf00000
+#define FAKE_MAP_LENGTH  0x100000
+
+#define FAKE_SYM_OFFSET1  700
+#define FAKE_SYM_OFFSET2  800
+#define FAKE_SYM_OFFSET3  900
+#define FAKE_SYM_LENGTH   100
+
+#define FAKE_IP_PERF_MAIN  FAKE_MAP_PERF + FAKE_SYM_OFFSET1
+#define FAKE_IP_PERF_RUN_COMMAND  FAKE_MAP_PERF + FAKE_SYM_OFFSET2
+#define FAKE_IP_PERF_CMD_RECORD  FAKE_MAP_PERF + FAKE_SYM_OFFSET3
+#define FAKE_IP_BASH_MAIN  FAKE_MAP_BASH + FAKE_SYM_OFFSET1
+#define FAKE_IP_BASH_XMALLOC  FAKE_MAP_BASH + FAKE_SYM_OFFSET2
+#define FAKE_IP_BASH_XFREE  FAKE_MAP_BASH + FAKE_SYM_OFFSET3
+#define FAKE_IP_LIBC_MALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
+#define FAKE_IP_LIBC_FREE  FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
+#define FAKE_IP_LIBC_REALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
+#define FAKE_IP_KERNEL_SCHEDULE  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
+#define FAKE_IP_KERNEL_PAGE_FAULT  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
+#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
+
 /*
  * The setup_fake_machine() provides a test environment which consists
  * of 3 processes that have 3 mappings and in turn, have 3 symbols
@@ -13,7 +41,7 @@
  * .............  .............  ...................
  *    perf:  100           perf  main
  *    perf:  100           perf  run_command
- *    perf:  100           perf  comd_record
+ *    perf:  100           perf  cmd_record
  *    perf:  100           libc  malloc
  *    perf:  100           libc  free
  *    perf:  100           libc  realloc
@@ -22,7 +50,7 @@
  *    perf:  100       [kernel]  sys_perf_event_open
  *    perf:  200           perf  main
  *    perf:  200           perf  run_command
- *    perf:  200           perf  comd_record
+ *    perf:  200           perf  cmd_record
  *    perf:  200           libc  malloc
  *    perf:  200           libc  free
  *    perf:  200           libc  realloc

diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
new file mode 100644
index 0000000..0ac240d
--- /dev/null
+++ b/tools/perf/tests/hists_cumulate.c

@@ -0,0 +1,726 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+
+struct sample {
+	u32 pid;
+	u64 ip;
+	struct thread *thread;
+	struct map *map;
+	struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+	/* perf [kernel] schedule() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [perf]   cmd_record() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+	/* perf [libc]   malloc() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+	/* perf [libc]   free() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [kernel] page_fault() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+	/* bash [bash]   main() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
+	/* bash [bash]   xmalloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
+	/* bash [kernel] page_fault() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+/*
+ * Will be casted to struct ip_callchain which has all 64 bit entries
+ * of nr and ips[].
+ */
+static u64 fake_callchains[][10] = {
+	/*   schedule => run_command => main */
+	{ 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   main  */
+	{ 1, FAKE_IP_PERF_MAIN, },
+	/*   cmd_record => run_command => main */
+	{ 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   malloc => cmd_record => run_command => main */
+	{ 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+	     FAKE_IP_PERF_MAIN, },
+	/*   free => cmd_record => run_command => main */
+	{ 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+	     FAKE_IP_PERF_MAIN, },
+	/*   main */
+	{ 1, FAKE_IP_PERF_MAIN, },
+	/*   page_fault => sys_perf_event_open => run_command => main */
+	{ 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
+	     FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   main */
+	{ 1, FAKE_IP_BASH_MAIN, },
+	/*   xmalloc => malloc => xmalloc => malloc => xmalloc => main */
+	{ 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
+	     FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
+	/*   page_fault => malloc => main */
+	{ 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+	struct addr_location al;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	struct perf_sample sample = { .period = 1000, };
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+		const union perf_event event = {
+			.header = {
+				.misc = PERF_RECORD_MISC_USER,
+			},
+		};
+		struct hist_entry_iter iter = {
+			.hide_unresolved = false,
+		};
+
+		if (symbol_conf.cumulate_callchain)
+			iter.ops = &hist_iter_cumulative;
+		else
+			iter.ops = &hist_iter_normal;
+
+		sample.pid = fake_samples[i].pid;
+		sample.tid = fake_samples[i].pid;
+		sample.ip = fake_samples[i].ip;
+		sample.callchain = (struct ip_callchain *)fake_callchains[i];
+
+		if (perf_event__preprocess_sample(&event, machine, &al,
+						  &sample) < 0)
+			goto out;
+
+		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+					 PERF_MAX_STACK_DEPTH, NULL) < 0)
+			goto out;
+
+		fake_samples[i].thread = al.thread;
+		fake_samples[i].map = al.map;
+		fake_samples[i].sym = al.sym;
+	}
+
+	return TEST_OK;
+
+out:
+	pr_debug("Not enough memory for adding a hist entry\n");
+	return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+	struct hist_entry *he;
+	struct rb_root *root_in;
+	struct rb_root *root_out;
+	struct rb_node *node;
+
+	if (sort__need_collapse)
+		root_in = &hists->entries_collapsed;
+	else
+		root_in = hists->entries_in;
+
+	root_out = &hists->entries;
+
+	while (!RB_EMPTY_ROOT(root_out)) {
+		node = rb_first(root_out);
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		rb_erase(node, root_out);
+		rb_erase(&he->rb_node_in, root_in);
+		hist_entry__free(he);
+	}
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he)  (thread__comm_str(he->thread))
+#define DSO(he)   (he->ms.map->dso->short_name)
+#define SYM(he)   (he->ms.sym->name)
+#define CPU(he)   (he->cpu)
+#define PID(he)   (he->thread->tid)
+#define DEPTH(he) (he->callchain->max_depth)
+#define CDSO(cl)  (cl->ms.map->dso->short_name)
+#define CSYM(cl)  (cl->ms.sym->name)
+
+struct result {
+	u64 children;
+	u64 self;
+	const char *comm;
+	const char *dso;
+	const char *sym;
+};
+
+struct callchain_result {
+	u64 nr;
+	struct {
+		const char *dso;
+		const char *sym;
+	} node[10];
+};
+
+static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
+		   struct callchain_result *expected_callchain, size_t nr_callchain)
+{
+	char buf[32];
+	size_t i, c;
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+	struct callchain_node *cnode;
+	struct callchain_list *clist;
+
+	/*
+	 * adding and deleting hist entries must be done outside of this
+	 * function since TEST_ASSERT_VAL() returns in case of failure.
+	 */
+	hists__collapse_resort(hists, NULL);
+	hists__output_resort(hists);
+
+	if (verbose > 2) {
+		pr_info("use callchain: %d, cumulate callchain: %d\n",
+			symbol_conf.use_callchain,
+			symbol_conf.cumulate_callchain);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	for (node = rb_first(root), i = 0;
+	     node && (he = rb_entry(node, struct hist_entry, rb_node));
+	     node = rb_next(node), i++) {
+		scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
+
+		TEST_ASSERT_VAL("Incorrect number of hist entry",
+				i < nr_expected);
+		TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
+				!strcmp(COMM(he), expected[i].comm) &&
+				!strcmp(DSO(he), expected[i].dso) &&
+				!strcmp(SYM(he), expected[i].sym));
+
+		if (symbol_conf.cumulate_callchain)
+			TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
+
+		if (!symbol_conf.use_callchain)
+			continue;
+
+		/* check callchain entries */
+		root = &he->callchain->node.rb_root;
+		cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
+
+		c = 0;
+		list_for_each_entry(clist, &cnode->val, list) {
+			scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
+
+			TEST_ASSERT_VAL("Incorrect number of callchain entry",
+					c < expected_callchain[i].nr);
+			TEST_ASSERT_VAL(buf,
+				!strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
+				!strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
+			c++;
+		}
+		/* TODO: handle multiple child nodes properly */
+		TEST_ASSERT_VAL("Incorrect number of callchain entry",
+				c <= expected_callchain[i].nr);
+	}
+	TEST_ASSERT_VAL("Incorrect number of hist entry",
+			i == nr_expected);
+	TEST_ASSERT_VAL("Incorrect number of callchain entry",
+			!symbol_conf.use_callchain || nr_expected == nr_callchain);
+	return 0;
+}
+
+/* NO callchain + NO children */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%     bash  bash           [.] main
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%     perf  libc           [.] free
+	 *   10.00%     perf  libc           [.] malloc
+	 *   10.00%     perf  perf           [.] cmd_record
+	 */
+	struct result expected[] = {
+		{ 0, 2000, "perf", "perf",     "main" },
+		{ 0, 1000, "bash", "[kernel]", "page_fault" },
+		{ 0, 1000, "bash", "bash",     "main" },
+		{ 0, 1000, "bash", "bash",     "xmalloc" },
+		{ 0, 1000, "perf", "[kernel]", "page_fault" },
+		{ 0, 1000, "perf", "[kernel]", "schedule" },
+		{ 0, 1000, "perf", "libc",     "free" },
+		{ 0, 1000, "perf", "libc",     "malloc" },
+		{ 0, 1000, "perf", "perf",     "cmd_record" },
+	};
+
+	symbol_conf.use_callchain = false;
+	symbol_conf.cumulate_callchain = false;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* callcain + NO children */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  malloc
+	 *                  main
+	 *
+	 *   10.00%     bash  bash           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *              |
+	 *              --- xmalloc
+	 *                  malloc
+	 *                  xmalloc     <--- NOTE: there's a cycle
+	 *                  malloc
+	 *                  xmalloc
+	 *                  main
+	 *
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *              |
+	 *              --- schedule
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  libc           [.] free
+	 *              |
+	 *              --- free
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  perf           [.] cmd_record
+	 *              |
+	 *              --- cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 */
+	struct result expected[] = {
+		{ 0, 2000, "perf", "perf",     "main" },
+		{ 0, 1000, "bash", "[kernel]", "page_fault" },
+		{ 0, 1000, "bash", "bash",     "main" },
+		{ 0, 1000, "bash", "bash",     "xmalloc" },
+		{ 0, 1000, "perf", "[kernel]", "page_fault" },
+		{ 0, 1000, "perf", "[kernel]", "schedule" },
+		{ 0, 1000, "perf", "libc",     "free" },
+		{ 0, 1000, "perf", "libc",     "malloc" },
+		{ 0, 1000, "perf", "perf",     "cmd_record" },
+	};
+	struct callchain_result expected_callchain[] = {
+		{
+			1, {	{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "page_fault" },
+				{ "libc",     "malloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			1, {	{ "bash",     "main" }, },
+		},
+		{
+			6, {	{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			4, {	{ "[kernel]", "page_fault" },
+				{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "schedule" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "free" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+	};
+
+	symbol_conf.use_callchain = true;
+	symbol_conf.cumulate_callchain = false;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected),
+		      expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* NO callchain + children */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Children      Self  Command  Shared Object                   Symbol
+	 * ========  ========  =======  =============  =======================
+	 *   70.00%    20.00%     perf  perf           [.] main
+	 *   50.00%     0.00%     perf  perf           [.] run_command
+	 *   30.00%    10.00%     bash  bash           [.] main
+	 *   30.00%    10.00%     perf  perf           [.] cmd_record
+	 *   20.00%     0.00%     bash  libc           [.] malloc
+	 *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%    10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+	 *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%    10.00%     perf  libc           [.] free
+	 *   10.00%    10.00%     perf  libc           [.] malloc
+	 *   10.00%    10.00%     bash  bash           [.] xmalloc
+	 */
+	struct result expected[] = {
+		{ 7000, 2000, "perf", "perf",     "main" },
+		{ 5000,    0, "perf", "perf",     "run_command" },
+		{ 3000, 1000, "bash", "bash",     "main" },
+		{ 3000, 1000, "perf", "perf",     "cmd_record" },
+		{ 2000,    0, "bash", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "[kernel]", "schedule" },
+		{ 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+		{ 1000, 1000, "perf", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "libc",     "free" },
+		{ 1000, 1000, "perf", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "bash",     "xmalloc" },
+	};
+
+	symbol_conf.use_callchain = false;
+	symbol_conf.cumulate_callchain = true;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* callchain + children */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = &evsel->hists;
+	/*
+	 * expected output:
+	 *
+	 * Children      Self  Command  Shared Object                   Symbol
+	 * ========  ========  =======  =============  =======================
+	 *   70.00%    20.00%     perf  perf           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   50.00%     0.00%     perf  perf           [.] run_command
+	 *              |
+	 *              --- run_command
+	 *                  main
+	 *
+	 *   30.00%    10.00%     bash  bash           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   30.00%    10.00%     perf  perf           [.] cmd_record
+	 *              |
+	 *              --- cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   20.00%     0.00%     bash  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                 |
+	 *                 |--50.00%-- xmalloc
+	 *                 |           main
+	 *                  --50.00%-- main
+	 *
+	 *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  malloc
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  [kernel]       [k] schedule
+	 *              |
+	 *              --- schedule
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+	 *              |
+	 *              --- sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  libc           [.] free
+	 *              |
+	 *              --- free
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     bash  bash           [.] xmalloc
+	 *              |
+	 *              --- xmalloc
+	 *                  malloc
+	 *                  xmalloc     <--- NOTE: there's a cycle
+	 *                  malloc
+	 *                  xmalloc
+	 *                  main
+	 *
+	 */
+	struct result expected[] = {
+		{ 7000, 2000, "perf", "perf",     "main" },
+		{ 5000,    0, "perf", "perf",     "run_command" },
+		{ 3000, 1000, "bash", "bash",     "main" },
+		{ 3000, 1000, "perf", "perf",     "cmd_record" },
+		{ 2000,    0, "bash", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "[kernel]", "schedule" },
+		{ 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+		{ 1000, 1000, "perf", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "libc",     "free" },
+		{ 1000, 1000, "perf", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "bash",     "xmalloc" },
+	};
+	struct callchain_result expected_callchain[] = {
+		{
+			1, {	{ "perf",     "main" }, },
+		},
+		{
+			2, {	{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			1, {	{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" },
+				{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "page_fault" },
+				{ "libc",     "malloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "schedule" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "[kernel]", "page_fault" },
+				{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "free" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			6, {	{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" }, },
+		},
+	};
+
+	symbol_conf.use_callchain = true;
+	symbol_conf.cumulate_callchain = true;
+
+	setup_sorting();
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected),
+		      expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+int test__hists_cumulate(void)
+{
+	int err = TEST_FAIL;
+	struct machines machines;
+	struct machine *machine;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+	size_t i;
+	test_fn_t testcases[] = {
+		test1,
+		test2,
+		test3,
+		test4,
+	};
+
+	TEST_ASSERT_VAL("No memory", evlist);
+
+	err = parse_events(evlist, "cpu-clock");
+	if (err)
+		goto out;
+
+	machines__init(&machines);
+
+	/* setup threads/dso/map/symbols also */
+	machine = setup_fake_machine(&machines);
+	if (!machine)
+		goto out;
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	evsel = perf_evlist__first(evlist);
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		err = testcases[i](evsel, machine);
+		if (err < 0)
+			break;
+	}
+
+out:
+	/* tear down everything */
+	perf_evlist__delete(evlist);
+	machines__exit(&machines);
+
+	return err;
+}

diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index c5ba924..821f581 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c

@@ -21,33 +21,33 @@
 /* For the numbers, see hists_common.c */
 static struct sample fake_samples[] = {
 	/* perf [kernel] schedule() */
-	{ .pid = 100, .ip = 0xf0000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
 	/* perf [perf]   main() */
-	{ .pid = 100, .ip = 0x40000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [libc]   malloc() */
-	{ .pid = 100, .ip = 0x50000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
 	/* perf [perf]   main() */
-	{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
 	/* perf [perf]   cmd_record() */
-	{ .pid = 200, .ip = 0x40000 + 900, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
 	/* perf [kernel] page_fault() */
-	{ .pid = 200, .ip = 0xf0000 + 800, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 	/* bash [bash]   main() */
-	{ .pid = 300, .ip = 0x40000 + 700, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
 	/* bash [bash]   xmalloc() */
-	{ .pid = 300, .ip = 0x40000 + 800, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
 	/* bash [libc]   malloc() */
-	{ .pid = 300, .ip = 0x50000 + 700, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
 	/* bash [kernel] page_fault() */
-	{ .pid = 300, .ip = 0xf0000 + 800, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 };
 
-static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
+static int add_hist_entries(struct perf_evlist *evlist,
+			    struct machine *machine __maybe_unused)
 {
 	struct perf_evsel *evsel;
 	struct addr_location al;
-	struct hist_entry *he;
-	struct perf_sample sample = { .cpu = 0, };
+	struct perf_sample sample = { .period = 100, };
 	size_t i;
 
 	/*
@@ -62,6 +62,10 @@
 					.misc = PERF_RECORD_MISC_USER,
 				},
 			};
+			struct hist_entry_iter iter = {
+				.ops = &hist_iter_normal,
+				.hide_unresolved = false,
+			};
 
 			/* make sure it has no filter at first */
 			evsel->hists.thread_filter = NULL;
@@ -76,18 +80,13 @@
 							  &sample) < 0)
 				goto out;
 
-			he = __hists__add_entry(&evsel->hists, &al, NULL,
-						NULL, NULL, 100, 1, 0);
-			if (he == NULL)
+			if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+						 PERF_MAX_STACK_DEPTH, NULL) < 0)
 				goto out;
 
 			fake_samples[i].thread = al.thread;
 			fake_samples[i].map = al.map;
 			fake_samples[i].sym = al.sym;
-
-			hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
-			if (!he->filtered)
-				he->hists->stats.nr_non_filtered_samples++;
 		}
 	}
 

diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 5ffa2c3..d4b34b0 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c

@@ -21,41 +21,41 @@
 /* For the numbers, see hists_common.c */
 static struct sample fake_common_samples[] = {
 	/* perf [kernel] schedule() */
-	{ .pid = 100, .ip = 0xf0000 + 700, },
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
 	/* perf [perf]   main() */
-	{ .pid = 200, .ip = 0x40000 + 700, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [perf]   cmd_record() */
-	{ .pid = 200, .ip = 0x40000 + 900, },
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
 	/* bash [bash]   xmalloc() */
-	{ .pid = 300, .ip = 0x40000 + 800, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
 	/* bash [libc]   malloc() */
-	{ .pid = 300, .ip = 0x50000 + 700, },
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
 };
 
 static struct sample fake_samples[][5] = {
 	{
 		/* perf [perf]   run_command() */
-		{ .pid = 100, .ip = 0x40000 + 800, },
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
 		/* perf [libc]   malloc() */
-		{ .pid = 100, .ip = 0x50000 + 700, },
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
 		/* perf [kernel] page_fault() */
-		{ .pid = 100, .ip = 0xf0000 + 800, },
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 		/* perf [kernel] sys_perf_event_open() */
-		{ .pid = 200, .ip = 0xf0000 + 900, },
+		{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
 		/* bash [libc]   free() */
-		{ .pid = 300, .ip = 0x50000 + 800, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_FREE, },
 	},
 	{
 		/* perf [libc]   free() */
-		{ .pid = 200, .ip = 0x50000 + 800, },
+		{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
 		/* bash [libc]   malloc() */
-		{ .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
 		/* bash [bash]   xfee() */
-		{ .pid = 300, .ip = 0x40000 + 900, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XFREE, },
 		/* bash [libc]   realloc() */
-		{ .pid = 300, .ip = 0x50000 + 900, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_REALLOC, },
 		/* bash [kernel] page_fault() */
-		{ .pid = 300, .ip = 0xf0000 + 800, },
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 	},
 };
 
@@ -64,7 +64,7 @@
 	struct perf_evsel *evsel;
 	struct addr_location al;
 	struct hist_entry *he;
-	struct perf_sample sample = { .cpu = 0, };
+	struct perf_sample sample = { .period = 1, };
 	size_t i = 0, k;
 
 	/*
@@ -88,7 +88,7 @@
 				goto out;
 
 			he = __hists__add_entry(&evsel->hists, &al, NULL,
-						NULL, NULL, 1, 1, 0);
+						NULL, NULL, 1, 1, 0, true);
 			if (he == NULL)
 				goto out;
 
@@ -112,7 +112,7 @@
 				goto out;
 
 			he = __hists__add_entry(&evsel->hists, &al, NULL,
-						NULL, NULL, 1, 1, 0);
+						NULL, NULL, 1, 1, 0, true);
 			if (he == NULL)
 				goto out;
 

diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index a168505..e3bbd6c 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c

@@ -22,31 +22,31 @@
 /* For the numbers, see hists_common.c */
 static struct sample fake_samples[] = {
 	/* perf [kernel] schedule() */
-	{ .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, },
+	{ .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
 	/* perf [perf]   main() */
-	{ .cpu = 1, .pid = 100, .ip = 0x40000 + 700, },
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [perf]   cmd_record() */
-	{ .cpu = 1, .pid = 100, .ip = 0x40000 + 900, },
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
 	/* perf [libc]   malloc() */
-	{ .cpu = 1, .pid = 100, .ip = 0x50000 + 700, },
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
 	/* perf [libc]   free() */
-	{ .cpu = 2, .pid = 100, .ip = 0x50000 + 800, },
+	{ .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
 	/* perf [perf]   main() */
-	{ .cpu = 2, .pid = 200, .ip = 0x40000 + 700, },
+	{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
 	/* perf [kernel] page_fault() */
-	{ .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, },
+	{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 	/* bash [bash]   main() */
-	{ .cpu = 3, .pid = 300, .ip = 0x40000 + 700, },
+	{ .cpu = 3, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
 	/* bash [bash]   xmalloc() */
-	{ .cpu = 0, .pid = 300, .ip = 0x40000 + 800, },
+	{ .cpu = 0, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
 	/* bash [kernel] page_fault() */
-	{ .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, },
+	{ .cpu = 1, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
 };
 
 static int add_hist_entries(struct hists *hists, struct machine *machine)
 {
 	struct addr_location al;
-	struct hist_entry *he;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
 	struct perf_sample sample = { .period = 100, };
 	size_t i;
 
@@ -56,6 +56,10 @@
 				.misc = PERF_RECORD_MISC_USER,
 			},
 		};
+		struct hist_entry_iter iter = {
+			.ops = &hist_iter_normal,
+			.hide_unresolved = false,
+		};
 
 		sample.cpu = fake_samples[i].cpu;
 		sample.pid = fake_samples[i].pid;
@@ -66,9 +70,8 @@
 						  &sample) < 0)
 			goto out;
 
-		he = __hists__add_entry(hists, &al, NULL, NULL, NULL,
-					sample.period, 1, 0);
-		if (he == NULL)
+		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+					 PERF_MAX_STACK_DEPTH, NULL) < 0)
 			goto out;
 
 		fake_samples[i].thread = al.thread;

diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d76c0e2..022bb68 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h

@@ -45,6 +45,7 @@
 int test__mmap_thread_lookup(void);
 int test__thread_mg_share(void);
 int test__hists_output(void);
+int test__hists_cumulate(void);
 
 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index d11541d..3ccf6e1 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c

@@ -194,7 +194,7 @@
 		ui_helpline__vpush(format, args);
 		va_end(args);
 	} else {
-		while ((key == ui__question_window("Warning!", text,
+		while ((key = ui__question_window("Warning!", text,
 						   "Press any key...",
 						   timeout)) == K_RESIZE)
 			ui_browser__handle_resize(browser);

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 1c331b9..52c03fb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c

@@ -37,7 +37,6 @@
 static void hist_browser__update_nr_entries(struct hist_browser *hb);
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
-					     struct hists *hists,
 					     float min_pcnt);
 
 static bool hist_browser__has_filter(struct hist_browser *hb)
@@ -319,7 +318,7 @@
 	struct hists *hists = browser->hists;
 
 	for (nd = rb_first(&hists->entries);
-	     (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL;
+	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
 	     nd = rb_next(nd)) {
 		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
 		hist_entry__set_folding(he, unfold);
@@ -651,13 +650,36 @@
 			  __hpp__slsmg_color_printf, true);		\
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)			\
+static u64 __hpp_get_acc_##_field(struct hist_entry *he)		\
+{									\
+	return he->stat_acc->_field;					\
+}									\
+									\
+static int								\
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
+				struct perf_hpp *hpp,			\
+				struct hist_entry *he)			\
+{									\
+	if (!symbol_conf.cumulate_callchain) {				\
+		int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A");	\
+		slsmg_printf("%s", hpp->buf);				\
+									\
+		return ret;						\
+	}								\
+	return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%",	\
+			  __hpp__slsmg_color_printf, true);		\
+}
+
 __HPP_COLOR_PERCENT_FN(overhead, period)
 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
 __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
 
 #undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
 
 void hist_browser__init_hpp(void)
 {
@@ -671,6 +693,8 @@
 				hist_browser__hpp_color_overhead_guest_sys;
 	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
 				hist_browser__hpp_color_overhead_guest_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+				hist_browser__hpp_color_overhead_acc;
 }
 
 static int hist_browser__show_entry(struct hist_browser *browser,
@@ -783,15 +807,12 @@
 
 	for (nd = browser->top; nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		u64 total = hists__total_period(h->hists);
-		float percent = 0.0;
+		float percent;
 
 		if (h->filtered)
 			continue;
 
-		if (total)
-			percent = h->stat.period * 100.0 / total;
-
+		percent = hist_entry__get_percent_limit(h);
 		if (percent < hb->min_pcnt)
 			continue;
 
@@ -804,16 +825,11 @@
 }
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
-					     struct hists *hists,
 					     float min_pcnt)
 {
 	while (nd != NULL) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		u64 total = hists__total_period(hists);
-		float percent = 0.0;
-
-		if (total)
-			percent = h->stat.period * 100.0 / total;
+		float percent = hist_entry__get_percent_limit(h);
 
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
@@ -825,16 +841,11 @@
 }
 
 static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
-						  struct hists *hists,
 						  float min_pcnt)
 {
 	while (nd != NULL) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		u64 total = hists__total_period(hists);
-		float percent = 0.0;
-
-		if (total)
-			percent = h->stat.period * 100.0 / total;
+		float percent = hist_entry__get_percent_limit(h);
 
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
@@ -863,14 +874,14 @@
 	switch (whence) {
 	case SEEK_SET:
 		nd = hists__filter_entries(rb_first(browser->entries),
-					   hb->hists, hb->min_pcnt);
+					   hb->min_pcnt);
 		break;
 	case SEEK_CUR:
 		nd = browser->top;
 		goto do_offset;
 	case SEEK_END:
 		nd = hists__filter_prev_entries(rb_last(browser->entries),
-						hb->hists, hb->min_pcnt);
+						hb->min_pcnt);
 		first = false;
 		break;
 	default:
@@ -913,8 +924,7 @@
 					break;
 				}
 			}
-			nd = hists__filter_entries(rb_next(nd), hb->hists,
-						   hb->min_pcnt);
+			nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
 			if (nd == NULL)
 				break;
 			--offset;
@@ -947,7 +957,7 @@
 				}
 			}
 
-			nd = hists__filter_prev_entries(rb_prev(nd), hb->hists,
+			nd = hists__filter_prev_entries(rb_prev(nd),
 							hb->min_pcnt);
 			if (nd == NULL)
 				break;
@@ -1126,7 +1136,6 @@
 static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 {
 	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
-						   browser->hists,
 						   browser->min_pcnt);
 	int printed = 0;
 
@@ -1134,8 +1143,7 @@
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
 		printed += hist_browser__fprintf_entry(browser, h, fp);
-		nd = hists__filter_entries(rb_next(nd), browser->hists,
-					   browser->min_pcnt);
+		nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
 	}
 
 	return printed;
@@ -1372,8 +1380,7 @@
 		return;
 	}
 
-	while ((nd = hists__filter_entries(nd, hb->hists,
-					   hb->min_pcnt)) != NULL) {
+	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
 		nr_entries++;
 		nd = rb_next(nd);
 	}
@@ -1699,14 +1706,14 @@
 zoom_out_dso:
 				ui_helpline__pop();
 				browser->hists->dso_filter = NULL;
-				sort_dso.elide = false;
+				perf_hpp__set_elide(HISTC_DSO, false);
 			} else {
 				if (dso == NULL)
 					continue;
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
 						   dso->kernel ? "the Kernel" : dso->short_name);
 				browser->hists->dso_filter = dso;
-				sort_dso.elide = true;
+				perf_hpp__set_elide(HISTC_DSO, true);
 				pstack__push(fstack, &browser->hists->dso_filter);
 			}
 			hists__filter_by_dso(hists);
@@ -1718,13 +1725,13 @@
 zoom_out_thread:
 				ui_helpline__pop();
 				browser->hists->thread_filter = NULL;
-				sort_thread.elide = false;
+				perf_hpp__set_elide(HISTC_THREAD, false);
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
 						   thread->comm_set ? thread__comm_str(thread) : "",
 						   thread->tid);
 				browser->hists->thread_filter = thread;
-				sort_thread.elide = true;
+				perf_hpp__set_elide(HISTC_THREAD, false);
 				pstack__push(fstack, &browser->hists->thread_filter);
 			}
 			hists__filter_by_thread(hists);

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 9d90683..6ca60e4 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c

@@ -47,11 +47,26 @@
 			  __percent_color_snprintf, true);			\
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+static u64 he_get_acc_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat_acc->_field;						\
+}										\
+										\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+				       struct perf_hpp *hpp,			\
+				       struct hist_entry *he)			\
+{										\
+	return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%",		\
+			      __percent_color_snprintf, true);			\
+}
+
 __HPP_COLOR_PERCENT_FN(overhead, period)
 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
 __HPP_COLOR_PERCENT_FN(overhead_us, period_us)
 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
 
 #undef __HPP_COLOR_PERCENT_FN
 
@@ -68,6 +83,8 @@
 				perf_gtk__hpp_color_overhead_guest_sys;
 	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
 				perf_gtk__hpp_color_overhead_guest_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+				perf_gtk__hpp_color_overhead_acc;
 }
 
 static void callchain_list__sym_name(struct callchain_list *cl,
@@ -181,6 +198,13 @@
 		if (perf_hpp__should_skip(fmt))
 			continue;
 
+		/*
+		 * XXX no way to determine where symcol column is..
+		 *     Just use last column for now.
+		 */
+		if (perf_hpp__is_sort_entry(fmt))
+			sym_col = col_idx;
+
 		fmt->header(fmt, &hpp, hists_to_evsel(hists));
 
 		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
@@ -209,14 +233,12 @@
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		GtkTreeIter iter;
 		u64 total = hists__total_period(h->hists);
-		float percent = 0.0;
+		float percent;
 
 		if (h->filtered)
 			continue;
 
-		if (total)
-			percent = h->stat.period * 100.0 / total;
-
+		percent = hist_entry__get_percent_limit(h);
 		if (percent < min_pcnt)
 			continue;
 
@@ -238,7 +260,8 @@
 
 		if (symbol_conf.use_callchain && sort__has_sym) {
 			if (callchain_param.mode == CHAIN_GRAPH_REL)
-				total = h->stat.period;
+				total = symbol_conf.cumulate_callchain ?
+					h->stat_acc->period : h->stat.period;
 
 			perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
 						sym_col, total);

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4484f5b..498adb2 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c

@@ -104,6 +104,18 @@
 	return ret;
 }
 
+int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
+		   hpp_field_fn get_field, const char *fmt,
+		   hpp_snprint_fn print_fn, bool fmt_percent)
+{
+	if (!symbol_conf.cumulate_callchain) {
+		return snprintf(hpp->buf, hpp->size, "%*s",
+				fmt_percent ? 8 : 12, "N/A");
+	}
+
+	return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
+}
+
 static int field_cmp(u64 field_a, u64 field_b)
 {
 	if (field_a > field_b)
@@ -160,6 +172,24 @@
 	return ret;
 }
 
+static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
+			   hpp_field_fn get_field)
+{
+	s64 ret = 0;
+
+	if (symbol_conf.cumulate_callchain) {
+		/*
+		 * Put caller above callee when they have equal period.
+		 */
+		ret = field_cmp(get_field(a), get_field(b));
+		if (ret)
+			return ret;
+
+		ret = b->callchain->max_depth - a->callchain->max_depth;
+	}
+	return ret;
+}
+
 #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) 		\
 static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
 			       struct perf_hpp *hpp,			\
@@ -242,6 +272,34 @@
 	return __hpp__sort(a, b, he_get_##_field);				\
 }
 
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+static u64 he_get_acc_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat_acc->_field;						\
+}										\
+										\
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%",		\
+			      hpp_color_scnprintf, true);			\
+}
+
+#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field)				\
+static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%";		\
+	return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt,		\
+			      hpp_entry_scnprintf, true);			\
+}
+
+#define __HPP_SORT_ACC_FN(_type, _field)					\
+static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b)	\
+{										\
+	return __hpp__sort_acc(a, b, he_get_acc_##_field);			\
+}
+
 #define __HPP_ENTRY_RAW_FN(_type, _field)					\
 static u64 he_get_raw_##_field(struct hist_entry *he)				\
 {										\
@@ -270,18 +328,27 @@
 __HPP_ENTRY_PERCENT_FN(_type, _field)					\
 __HPP_SORT_FN(_type, _field)
 
+#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
+__HPP_HEADER_FN(_type, _str, _min_width, _unit_width)			\
+__HPP_WIDTH_FN(_type, _min_width, _unit_width)				\
+__HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+__HPP_ENTRY_ACC_PERCENT_FN(_type, _field)				\
+__HPP_SORT_ACC_FN(_type, _field)
+
 #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width)	\
 __HPP_HEADER_FN(_type, _str, _min_width, _unit_width)			\
 __HPP_WIDTH_FN(_type, _min_width, _unit_width)				\
 __HPP_ENTRY_RAW_FN(_type, _field)					\
 __HPP_SORT_RAW_FN(_type, _field)
 
+__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
 
 HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
 HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
 HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
 HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
 HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
+HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
 
 HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
 HPP_RAW_FNS(period, "Period", period, 12, 12)
@@ -303,6 +370,17 @@
 		.sort	= hpp__sort_ ## _name,		\
 	}
 
+#define HPP__COLOR_ACC_PRINT_FNS(_name)			\
+	{						\
+		.header	= hpp__header_ ## _name,	\
+		.width	= hpp__width_ ## _name,		\
+		.color	= hpp__color_ ## _name,		\
+		.entry	= hpp__entry_ ## _name,		\
+		.cmp	= hpp__nop_cmp,			\
+		.collapse = hpp__nop_cmp,		\
+		.sort	= hpp__sort_ ## _name,		\
+	}
+
 #define HPP__PRINT_FNS(_name)				\
 	{						\
 		.header	= hpp__header_ ## _name,	\
@@ -319,6 +397,7 @@
 	HPP__COLOR_PRINT_FNS(overhead_us),
 	HPP__COLOR_PRINT_FNS(overhead_guest_sys),
 	HPP__COLOR_PRINT_FNS(overhead_guest_us),
+	HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
 	HPP__PRINT_FNS(samples),
 	HPP__PRINT_FNS(period)
 };
@@ -328,16 +407,23 @@
 
 
 #undef HPP__COLOR_PRINT_FNS
+#undef HPP__COLOR_ACC_PRINT_FNS
 #undef HPP__PRINT_FNS
 
 #undef HPP_PERCENT_FNS
+#undef HPP_PERCENT_ACC_FNS
 #undef HPP_RAW_FNS
 
 #undef __HPP_HEADER_FN
 #undef __HPP_WIDTH_FN
 #undef __HPP_COLOR_PERCENT_FN
 #undef __HPP_ENTRY_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+#undef __HPP_ENTRY_ACC_PERCENT_FN
 #undef __HPP_ENTRY_RAW_FN
+#undef __HPP_SORT_FN
+#undef __HPP_SORT_ACC_FN
+#undef __HPP_SORT_RAW_FN
 
 
 void perf_hpp__init(void)
@@ -361,6 +447,13 @@
 	if (field_order)
 		return;
 
+	if (symbol_conf.cumulate_callchain) {
+		perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
+
+		perf_hpp__format[PERF_HPP__OVERHEAD].header =
+						hpp__header_overhead_self;
+	}
+
 	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
 
 	if (symbol_conf.show_cpu_utilization) {
@@ -383,6 +476,12 @@
 	list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
 	if (list_empty(list))
 		list_add(list, &perf_hpp__sort_list);
+
+	if (symbol_conf.cumulate_callchain) {
+		list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
+		if (list_empty(list))
+			list_add(list, &perf_hpp__sort_list);
+	}
 }
 
 void perf_hpp__column_register(struct perf_hpp_fmt *format)
@@ -390,6 +489,11 @@
 	list_add_tail(&format->list, &perf_hpp__list);
 }
 
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+{
+	list_del(&format->list);
+}
+
 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
 {
 	list_add_tail(&format->sort_list, &perf_hpp__sort_list);
@@ -401,6 +505,21 @@
 	perf_hpp__column_register(&perf_hpp__format[col]);
 }
 
+void perf_hpp__column_disable(unsigned col)
+{
+	BUG_ON(col >= PERF_HPP__MAX_INDEX);
+	perf_hpp__column_unregister(&perf_hpp__format[col]);
+}
+
+void perf_hpp__cancel_cumulate(void)
+{
+	if (field_order)
+		return;
+
+	perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
+	perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
+}
+
 void perf_hpp__setup_output_field(void)
 {
 	struct perf_hpp_fmt *fmt;

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9f57991..90122ab 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c

@@ -271,7 +271,9 @@
 {
 	switch (callchain_param.mode) {
 	case CHAIN_GRAPH_REL:
-		return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period,
+		return callchain__fprintf_graph(fp, &he->sorted_chain,
+						symbol_conf.cumulate_callchain ?
+						he->stat_acc->period : he->stat.period,
 						left_margin);
 		break;
 	case CHAIN_GRAPH_ABS:
@@ -461,12 +463,12 @@
 
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		float percent = h->stat.period * 100.0 /
-					hists->stats.total_period;
+		float percent;
 
 		if (h->filtered)
 			continue;
 
+		percent = hist_entry__get_percent_limit(h);
 		if (percent < min_pcnt)
 			continue;
 

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9a42382..48b6d3f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c

@@ -616,7 +616,8 @@
 	if (sample->callchain == NULL)
 		return 0;
 
-	if (symbol_conf.use_callchain || sort__has_parent) {
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+	    sort__has_parent) {
 		return machine__resolve_callchain(al->machine, evsel, al->thread,
 						  sample, parent, al, max_stack);
 	}
@@ -629,3 +630,45 @@
 		return 0;
 	return callchain_append(he->callchain, &callchain_cursor, sample->period);
 }
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved)
+{
+	al->map = node->map;
+	al->sym = node->sym;
+	if (node->map)
+		al->addr = node->map->map_ip(node->map, node->ip);
+	else
+		al->addr = node->ip;
+
+	if (al->sym == NULL) {
+		if (hide_unresolved)
+			return 0;
+		if (al->map == NULL)
+			goto out;
+	}
+
+	if (al->map->groups == &al->machine->kmaps) {
+		if (machine__is_host(al->machine)) {
+			al->cpumode = PERF_RECORD_MISC_KERNEL;
+			al->level = 'k';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+			al->level = 'g';
+		}
+	} else {
+		if (machine__is_host(al->machine)) {
+			al->cpumode = PERF_RECORD_MISC_USER;
+			al->level = '.';
+		} else if (perf_guest) {
+			al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+			al->level = 'u';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+			al->level = 'H';
+		}
+	}
+
+out:
+	return 1;
+}

diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index bde2b0c..8f84423 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h

@@ -162,7 +162,18 @@
 			      struct perf_evsel *evsel, struct addr_location *al,
 			      int max_stack);
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved);
 
 extern const char record_callchain_help[];
 int parse_callchain_report_opt(const char *arg);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+					     struct callchain_cursor *src)
+{
+	*dest = *src;
+
+	dest->first = src->curr;
+	dest->nr -= src->pos;
+}
 #endif	/* __PERF_CALLCHAIN_H */

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b262b44..5a0a4b2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c

@@ -4,6 +4,7 @@
 #include "session.h"
 #include "sort.h"
 #include "evsel.h"
+#include "annotate.h"
 #include <math.h>
 
 static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -231,6 +232,8 @@
 		return true;
 
 	he_stat__decay(&he->stat);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__decay(he->stat_acc);
 
 	diff = prev_period - he->stat.period;
 
@@ -276,14 +279,31 @@
  * histogram, sorted on item, collects periods
  */
 
-static struct hist_entry *hist_entry__new(struct hist_entry *template)
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+					  bool sample_self)
 {
-	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
-	struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
+	size_t callchain_size = 0;
+	struct hist_entry *he;
+
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
+		callchain_size = sizeof(struct callchain_root);
+
+	he = zalloc(sizeof(*he) + callchain_size);
 
 	if (he != NULL) {
 		*he = *template;
 
+		if (symbol_conf.cumulate_callchain) {
+			he->stat_acc = malloc(sizeof(he->stat));
+			if (he->stat_acc == NULL) {
+				free(he);
+				return NULL;
+			}
+			memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+			if (!sample_self)
+				memset(&he->stat, 0, sizeof(he->stat));
+		}
+
 		if (he->ms.map)
 			he->ms.map->referenced = true;
 
@@ -295,6 +315,7 @@
 			 */
 			he->branch_info = malloc(sizeof(*he->branch_info));
 			if (he->branch_info == NULL) {
+				free(he->stat_acc);
 				free(he);
 				return NULL;
 			}
@@ -333,7 +354,8 @@
 
 static struct hist_entry *add_hist_entry(struct hists *hists,
 					 struct hist_entry *entry,
-					 struct addr_location *al)
+					 struct addr_location *al,
+					 bool sample_self)
 {
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -357,7 +379,10 @@
 		cmp = hist_entry__cmp(he, entry);
 
 		if (!cmp) {
-			he_stat__add_period(&he->stat, period, weight);
+			if (sample_self)
+				he_stat__add_period(&he->stat, period, weight);
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_period(he->stat_acc, period, weight);
 
 			/*
 			 * This mem info was allocated from sample__resolve_mem
@@ -385,14 +410,17 @@
 			p = &(*p)->rb_right;
 	}
 
-	he = hist_entry__new(entry);
+	he = hist_entry__new(entry, sample_self);
 	if (!he)
 		return NULL;
 
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
-	he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+	if (sample_self)
+		he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
 	return he;
 }
 
@@ -401,7 +429,8 @@
 				      struct symbol *sym_parent,
 				      struct branch_info *bi,
 				      struct mem_info *mi,
-				      u64 period, u64 weight, u64 transaction)
+				      u64 period, u64 weight, u64 transaction,
+				      bool sample_self)
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
@@ -426,7 +455,429 @@
 		.transaction = transaction,
 	};
 
-	return add_hist_entry(hists, &entry, al);
+	return add_hist_entry(hists, &entry, al, sample_self);
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+		    struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+			struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_sample *sample = iter->sample;
+	struct mem_info *mi;
+
+	mi = sample__resolve_mem(sample, al);
+	if (mi == NULL)
+		return -ENOMEM;
+
+	iter->priv = mi;
+	return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	u64 cost;
+	struct mem_info *mi = iter->priv;
+	struct hist_entry *he;
+
+	if (mi == NULL)
+		return -EINVAL;
+
+	cost = iter->sample->weight;
+	if (!cost)
+		cost = 1;
+
+	/*
+	 * must pass period=weight in order to get the correct
+	 * sorting from hists__collapse_resort() which is solely
+	 * based on periods. We want sorting be done on nr_events * weight
+	 * and this is indirectly achieved by passing period=weight here
+	 * and the he_stat__add_period() function.
+	 */
+	he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
+				cost, cost, 0, true);
+	if (!he)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+		      struct addr_location *al __maybe_unused)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct hist_entry *he = iter->he;
+	int err = -EINVAL;
+
+	if (he == NULL)
+		goto out;
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+	err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+	/*
+	 * We don't need to free iter->priv (mem_info) here since
+	 * the mem info was either already freed in add_hist_entry() or
+	 * passed to a new hist entry by hist_entry__new().
+	 */
+	iter->priv = NULL;
+
+	iter->he = NULL;
+	return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return -ENOMEM;
+
+	iter->curr = 0;
+	iter->total = sample->branch_stack->nr;
+
+	iter->priv = bi;
+	return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+			     struct addr_location *al __maybe_unused)
+{
+	/* to avoid calling callback function */
+	iter->he = NULL;
+
+	return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi = iter->priv;
+	int i = iter->curr;
+
+	if (bi == NULL)
+		return 0;
+
+	if (iter->curr >= iter->total)
+		return 0;
+
+	al->map = bi[i].to.map;
+	al->sym = bi[i].to.sym;
+	al->addr = bi[i].to.addr;
+	return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_evsel *evsel = iter->evsel;
+	struct hist_entry *he = NULL;
+	int i = iter->curr;
+	int err = 0;
+
+	bi = iter->priv;
+
+	if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+		goto out;
+
+	/*
+	 * The report shows the percentage of total branches captured
+	 * and not events sampled. Thus we use a pseudo period of 1.
+	 */
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
+				1, 1, 0, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+out:
+	iter->he = he;
+	iter->curr++;
+	return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+			  struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he;
+
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+
+	if (he == NULL)
+		return 0;
+
+	iter->he = NULL;
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+	return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
+			      struct addr_location *al __maybe_unused)
+{
+	struct hist_entry **he_cache;
+
+	callchain_cursor_commit(&callchain_cursor);
+
+	/*
+	 * This is for detecting cycles or recursions so that they're
+	 * cumulated only one time to prevent entries more than 100%
+	 * overhead.
+	 */
+	he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
+	if (he_cache == NULL)
+		return -ENOMEM;
+
+	iter->priv = he_cache;
+	iter->curr = 0;
+
+	return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+				 struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	int err = 0;
+
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	callchain_append(he->callchain, &callchain_cursor, sample->period);
+
+	/*
+	 * We need to re-initialize the cursor since callchain_append()
+	 * advanced the cursor to the end.
+	 */
+	callchain_cursor_commit(&callchain_cursor);
+
+	hists__inc_nr_samples(&evsel->hists, he->filtered);
+
+	return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+			   struct addr_location *al)
+{
+	struct callchain_cursor_node *node;
+
+	node = callchain_cursor_current(&callchain_cursor);
+	if (node == NULL)
+		return 0;
+
+	return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+			       struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	struct hist_entry he_tmp = {
+		.cpu = al->cpu,
+		.thread = al->thread,
+		.comm = thread__comm(al->thread),
+		.ip = al->addr,
+		.ms = {
+			.map = al->map,
+			.sym = al->sym,
+		},
+		.parent = iter->parent,
+	};
+	int i;
+	struct callchain_cursor cursor;
+
+	callchain_cursor_snapshot(&cursor, &callchain_cursor);
+
+	callchain_cursor_advance(&callchain_cursor);
+
+	/*
+	 * Check if there's duplicate entries in the callchain.
+	 * It's possible that it has cycles or recursive calls.
+	 */
+	for (i = 0; i < iter->curr; i++) {
+		if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+			/* to avoid calling callback function */
+			iter->he = NULL;
+			return 0;
+		}
+	}
+
+	he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction, false);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	callchain_append(he->callchain, &cursor, sample->period);
+	return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+			     struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+	.prepare_entry 		= iter_prepare_mem_entry,
+	.add_single_entry 	= iter_add_single_mem_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+	.prepare_entry 		= iter_prepare_branch_entry,
+	.add_single_entry 	= iter_add_single_branch_entry,
+	.next_entry 		= iter_next_branch_entry,
+	.add_next_entry 	= iter_add_next_branch_entry,
+	.finish_entry 		= iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+	.prepare_entry 		= iter_prepare_normal_entry,
+	.add_single_entry 	= iter_add_single_normal_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+	.prepare_entry 		= iter_prepare_cumulative_entry,
+	.add_single_entry 	= iter_add_single_cumulative_entry,
+	.next_entry 		= iter_next_cumulative_entry,
+	.add_next_entry 	= iter_add_next_cumulative_entry,
+	.finish_entry 		= iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 struct perf_evsel *evsel, struct perf_sample *sample,
+			 int max_stack_depth, void *arg)
+{
+	int err, err2;
+
+	err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
+					max_stack_depth);
+	if (err)
+		return err;
+
+	iter->evsel = evsel;
+	iter->sample = sample;
+
+	err = iter->ops->prepare_entry(iter, al);
+	if (err)
+		goto out;
+
+	err = iter->ops->add_single_entry(iter, al);
+	if (err)
+		goto out;
+
+	if (iter->he && iter->add_entry_cb) {
+		err = iter->add_entry_cb(iter, al, true, arg);
+		if (err)
+			goto out;
+	}
+
+	while (iter->ops->next_entry(iter, al)) {
+		err = iter->ops->add_next_entry(iter, al);
+		if (err)
+			break;
+
+		if (iter->he && iter->add_entry_cb) {
+			err = iter->add_entry_cb(iter, al, false, arg);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	err2 = iter->ops->finish_entry(iter, al);
+	if (!err)
+		err = err2;
+
+	return err;
 }
 
 int64_t
@@ -469,6 +920,7 @@
 {
 	zfree(&he->branch_info);
 	zfree(&he->mem_info);
+	zfree(&he->stat_acc);
 	free_srcline(he->srcline);
 	free(he);
 }
@@ -494,6 +946,8 @@
 
 		if (!cmp) {
 			he_stat__add_stat(&iter->stat, &he->stat);
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
 			if (symbol_conf.use_callchain) {
 				callchain_cursor_reset(&callchain_cursor);
@@ -800,6 +1254,13 @@
 	events_stats__inc(&hists->stats, type);
 }
 
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+	events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
+	if (!filtered)
+		hists->stats.nr_non_filtered_samples++;
+}
+
 static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 						 struct hist_entry *pair)
 {
@@ -831,7 +1292,7 @@
 			p = &(*p)->rb_right;
 	}
 
-	he = hist_entry__new(pair);
+	he = hist_entry__new(pair, true);
 	if (he) {
 		memset(&he->stat, 0, sizeof(he->stat));
 		he->hists = hists;

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a8418d1..d2bf035 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h

@@ -96,12 +96,50 @@
 	u16			col_len[HISTC_NR_COLS];
 };
 
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+	int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+	int total;
+	int curr;
+
+	bool hide_unresolved;
+
+	struct perf_evsel *evsel;
+	struct perf_sample *sample;
+	struct hist_entry *he;
+	struct symbol *parent;
+	void *priv;
+
+	const struct hist_iter_ops *ops;
+	/* user-defined callback function (optional) */
+	int (*add_entry_cb)(struct hist_entry_iter *iter,
+			    struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
 struct hist_entry *__hists__add_entry(struct hists *hists,
 				      struct addr_location *al,
 				      struct symbol *parent,
 				      struct branch_info *bi,
 				      struct mem_info *mi, u64 period,
-				      u64 weight, u64 transaction);
+				      u64 weight, u64 transaction,
+				      bool sample_self);
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 struct perf_evsel *evsel, struct perf_sample *sample,
+			 int max_stack_depth, void *arg);
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
@@ -119,6 +157,7 @@
 void hists__reset_stats(struct hists *hists);
 void hists__inc_stats(struct hists *hists, struct hist_entry *h);
 void hists__inc_nr_events(struct hists *hists, u32 type);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
 void events_stats__inc(struct events_stats *stats, u32 type);
 size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
 
@@ -166,6 +205,7 @@
 
 	struct list_head list;
 	struct list_head sort_list;
+	bool elide;
 };
 
 extern struct list_head perf_hpp__list;
@@ -192,6 +232,7 @@
 	PERF_HPP__OVERHEAD_US,
 	PERF_HPP__OVERHEAD_GUEST_SYS,
 	PERF_HPP__OVERHEAD_GUEST_US,
+	PERF_HPP__OVERHEAD_ACC,
 	PERF_HPP__SAMPLES,
 	PERF_HPP__PERIOD,
 
@@ -200,7 +241,11 @@
 
 void perf_hpp__init(void);
 void perf_hpp__column_register(struct perf_hpp_fmt *format);
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
 void perf_hpp__column_enable(unsigned col);
+void perf_hpp__column_disable(unsigned col);
+void perf_hpp__cancel_cumulate(void);
+
 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
 void perf_hpp__setup_output_field(void);
 void perf_hpp__reset_output_field(void);
@@ -208,7 +253,12 @@
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
-bool perf_hpp__should_skip(struct perf_hpp_fmt *format);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+{
+	return format->elide;
+}
+
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
@@ -218,6 +268,9 @@
 int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
 	       hpp_field_fn get_field, const char *fmt,
 	       hpp_snprint_fn print_fn, bool fmt_percent);
+int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
+		   hpp_field_fn get_field, const char *fmt,
+		   hpp_snprint_fn print_fn, bool fmt_percent);
 
 static inline void advance_hpp(struct perf_hpp *hpp, int inc)
 {

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 901b9be..45512ba 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -1061,6 +1061,7 @@
 	DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
 	DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
 	DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+	DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
 	DIM(PERF_HPP__SAMPLES, "sample"),
 	DIM(PERF_HPP__PERIOD, "period"),
 };
@@ -1156,6 +1157,7 @@
 
 	INIT_LIST_HEAD(&hse->hpp.list);
 	INIT_LIST_HEAD(&hse->hpp.sort_list);
+	hse->hpp.elide = false;
 
 	return hse;
 }
@@ -1363,27 +1365,64 @@
 	return ret;
 }
 
-bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+void perf_hpp__set_elide(int idx, bool elide)
 {
-	if (perf_hpp__is_sort_entry(format)) {
-		struct hpp_sort_entry *hse;
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
 
-		hse = container_of(format, struct hpp_sort_entry, hpp);
-		return hse->se->elide;
+	perf_hpp__for_each_format(fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_width_idx == idx) {
+			fmt->elide = elide;
+			break;
+		}
 	}
-	return false;
 }
 
-static void sort_entry__setup_elide(struct sort_entry *se,
-				    struct strlist *list,
-				    const char *list_name, FILE *fp)
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
 {
 	if (list && strlist__nr_entries(list) == 1) {
 		if (fp != NULL)
 			fprintf(fp, "# %s: %s\n", list_name,
 				strlist__entry(list, 0)->s);
-		se->elide = true;
+		return true;
 	}
+	return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+	switch (idx) {
+	case HISTC_SYMBOL:
+		return __get_elide(symbol_conf.sym_list, "symbol", output);
+	case HISTC_DSO:
+		return __get_elide(symbol_conf.dso_list, "dso", output);
+	case HISTC_COMM:
+		return __get_elide(symbol_conf.comm_list, "comm", output);
+	default:
+		break;
+	}
+
+	if (sort__mode != SORT_MODE__BRANCH)
+		return false;
+
+	switch (idx) {
+	case HISTC_SYMBOL_FROM:
+		return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+	case HISTC_SYMBOL_TO:
+		return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+	case HISTC_DSO_FROM:
+		return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+	case HISTC_DSO_TO:
+		return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+	default:
+		break;
+	}
+
+	return false;
 }
 
 void sort__setup_elide(FILE *output)
@@ -1391,39 +1430,12 @@
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
 
-	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-				"dso", output);
-	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list,
-				"comm", output);
-	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list,
-				"symbol", output);
+	perf_hpp__for_each_format(fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
 
-	if (sort__mode == SORT_MODE__BRANCH) {
-		sort_entry__setup_elide(&sort_dso_from,
-					symbol_conf.dso_from_list,
-					"dso_from", output);
-		sort_entry__setup_elide(&sort_dso_to,
-					symbol_conf.dso_to_list,
-					"dso_to", output);
-		sort_entry__setup_elide(&sort_sym_from,
-					symbol_conf.sym_from_list,
-					"sym_from", output);
-		sort_entry__setup_elide(&sort_sym_to,
-					symbol_conf.sym_to_list,
-					"sym_to", output);
-	} else if (sort__mode == SORT_MODE__MEMORY) {
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"symbol_daddr", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"dso_daddr", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"mem", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"local_weight", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"tlb", output);
-		sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
-					"snoop", output);
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		fmt->elide = get_elide(hse->se->se_width_idx, output);
 	}
 
 	/*
@@ -1434,8 +1446,7 @@
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
-		hse = container_of(fmt, struct hpp_sort_entry, hpp);
-		if (!hse->se->elide)
+		if (!fmt->elide)
 			return;
 	}
 
@@ -1443,8 +1454,7 @@
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
-		hse = container_of(fmt, struct hpp_sort_entry, hpp);
-		hse->se->elide = false;
+		fmt->elide = false;
 	}
 }
 
@@ -1581,6 +1591,9 @@
 	sort__has_sym = 0;
 	sort__has_dso = 0;
 
+	field_order = NULL;
+	sort_order = NULL;
+
 	reset_dimensions();
 	perf_hpp__reset_output_field();
 }

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 5f38d92..5bf0098 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h

@@ -20,7 +20,7 @@
 
 #include "parse-options.h"
 #include "parse-events.h"
-
+#include "hist.h"
 #include "thread.h"
 
 extern regex_t parent_regex;
@@ -82,6 +82,7 @@
 		struct list_head head;
 	} pairs;
 	struct he_stat		stat;
+	struct he_stat		*stat_acc;
 	struct map_symbol	ms;
 	struct thread		*thread;
 	struct comm		*comm;
@@ -130,6 +131,21 @@
 	list_add_tail(&pair->pairs.node, &he->pairs.head);
 }
 
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+	u64 period = he->stat.period;
+	u64 total_period = hists__total_period(he->hists);
+
+	if (unlikely(total_period == 0))
+		return 0;
+
+	if (symbol_conf.cumulate_callchain)
+		period = he->stat_acc->period;
+
+	return period * 100.0 / total_period;
+}
+
+
 enum sort_mode {
 	SORT_MODE__NORMAL,
 	SORT_MODE__BRANCH,
@@ -186,7 +202,6 @@
 	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
 			       unsigned int width);
 	u8	se_width_idx;
-	bool	elide;
 };
 
 extern struct sort_entry sort_thread;
@@ -197,6 +212,7 @@
 void reset_output_field(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
 
 int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
 

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 95e2497..7b9096f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -29,11 +29,12 @@
 char **vmlinux_path;
 
 struct symbol_conf symbol_conf = {
-	.use_modules	  = true,
-	.try_vmlinux_path = true,
-	.annotate_src	  = true,
-	.demangle	  = true,
-	.symfs            = "",
+	.use_modules		= true,
+	.try_vmlinux_path	= true,
+	.annotate_src		= true,
+	.demangle		= true,
+	.cumulate_callchain	= true,
+	.symfs			= "",
 };
 
 static enum dso_binary_type binary_type_symtab[] = {

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 33ede53..615c752 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h

@@ -109,6 +109,7 @@
 			show_nr_samples,
 			show_total_period,
 			use_callchain,
+			cumulate_callchain,
 			exclude_other,
 			show_cpu_utilization,
 			initialized,

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index b3dbe9e..54833a7 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile

@@ -13,7 +13,7 @@
 
 export CC CFLAGS
 
-TARGETS = pmu copyloops mm
+TARGETS = pmu copyloops mm tm
 
 endif
 

diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index e80c42a..8ebc58a 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c

@@ -30,12 +30,15 @@
 
 	pid = fork();
 	if (pid == 0) {
+		setpgid(0, 0);
 		exit(test_function());
 	} else if (pid == -1) {
 		perror("fork");
 		return 1;
 	}
 
+	setpgid(pid, pid);
+
 	/* Wake us up in timeout seconds */
 	alarm(TIMEOUT);
 	terminated = false;
@@ -50,17 +53,20 @@
 
 		if (terminated) {
 			printf("!! force killing %s\n", name);
-			kill(pid, SIGKILL);
+			kill(-pid, SIGKILL);
 			return 1;
 		} else {
 			printf("!! killing %s\n", name);
-			kill(pid, SIGTERM);
+			kill(-pid, SIGTERM);
 			terminated = true;
 			alarm(KILL_TIMEOUT);
 			goto wait;
 		}
 	}
 
+	/* Kill anything else in the process group that is still running */
+	kill(-pid, SIGTERM);
+
 	if (WIFEXITED(status))
 		status = WEXITSTATUS(status);
 	else {
@@ -99,7 +105,10 @@
 
 	rc = run_test(test_function, name);
 
-	test_finish(name, rc);
+	if (rc == MAGIC_SKIP_RETURN_VALUE)
+		test_skip(name);
+	else
+		test_finish(name, rc);
 
 	return rc;
 }

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 7216f00..b9ff0db 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile

@@ -4,7 +4,7 @@
 PROGS := count_instructions
 EXTRA_SOURCES := ../harness.c event.c
 
-all: $(PROGS)
+all: $(PROGS) sub_all
 
 $(PROGS): $(EXTRA_SOURCES)
 
@@ -12,12 +12,30 @@
 count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
 	$(CC) $(CFLAGS) -m64 -o $@ $^
 
-run_tests: all
+run_tests: all sub_run_tests
 	@-for PROG in $(PROGS); do \
 		./$$PROG; \
 	done;
 
-clean:
+clean: sub_clean
 	rm -f $(PROGS) loop.o
 
-.PHONY: all run_tests clean
+
+SUB_TARGETS = ebb
+
+sub_all:
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET all; \
+	done;
+
+sub_run_tests: all
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET run_tests; \
+	done;
+
+sub_clean:
+	@for TARGET in $(SUB_TARGETS); do \
+		$(MAKE) -C $$TARGET clean; \
+	done;
+
+.PHONY: all run_tests clean sub_all sub_run_tests sub_clean

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
new file mode 100644
index 0000000..edbba2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile

@@ -0,0 +1,32 @@
+noarg:
+	$(MAKE) -C ../../
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+PROGS := reg_access_test event_attributes_test cycles_test	\
+	 cycles_with_freeze_test pmc56_overflow_test		\
+	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
+	 cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test	\
+	 task_event_pinned_vs_ebb_test multi_ebb_procs_test	\
+	 multi_counter_test pmae_handling_test			\
+	 close_clears_pmcc_test instruction_count_test		\
+	 fork_cleanup_test ebb_on_child_test			\
+	 ebb_on_willing_child_test back_to_back_ebbs_test	\
+	 lost_exception_test no_handler_test
+
+all: $(PROGS)
+
+$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c
+
+instruction_count_test: ../loop.S
+
+lost_exception_test: ../lib.c
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
new file mode 100644
index 0000000..66ea765
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c

@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+#define NUMBER_OF_EBBS	50
+
+/*
+ * Test that if we overflow the counter while in the EBB handler, we take
+ * another EBB on exiting from the handler.
+ *
+ * We do this by counting with a stupidly low sample period, causing us to
+ * overflow the PMU while we're still in the EBB handler, leading to another
+ * EBB.
+ *
+ * We get out of what would otherwise be an infinite loop by leaving the
+ * counter frozen once we've taken enough EBBs.
+ */
+
+static void ebb_callee(void)
+{
+	uint64_t siar, val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	/* Resets the PMC */
+	count_pmc(1, sample_period);
+
+out:
+	if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
+		/* Reset but leave counters frozen */
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		/* Unfreezes */
+		reset_ebb();
+
+	/* Do some stuff to chew some cycles and pop the counter */
+	siar = mfspr(SPRN_SIAR);
+	trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
+
+	val = mfspr(SPRN_PMC1);
+	trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+}
+
+int back_to_back_ebbs(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = 5;
+
+	ebb_freeze_pmcs();
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	ebb_global_enable();
+	ebb_unfreeze_pmcs();
+
+	while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
new file mode 100644
index 0000000..0f0423d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c

@@ -0,0 +1,59 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC, preventing further access
+ * by userspace to the PMU hardware.
+ */
+
+int close_clears_pmcc(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* The real test is here, do we take a SIGILL when writing PMU regs now
+	 * that we have closed the event. We expect that we will. */
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We should still be able to read EBB regs though */
+	mfspr(SPRN_EBBHR);
+	mfspr(SPRN_EBBRR);
+	mfspr(SPRN_BESCR);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(close_clears_pmcc, "close_clears_pmcc");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
new file mode 100644
index 0000000..d3ed64d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
+ * should remain and the EBB event should fail to enable.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event should have run */
+	FAIL_IF(event.result.value == 0);
+	FAIL_IF(event.result.enabled != event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
new file mode 100644
index 0000000..8b972c2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c

@@ -0,0 +1,89 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect the child to succeed */
+	FAIL_IF(wait_for_child(pid));
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
new file mode 100644
index 0000000..8590fc1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c

@@ -0,0 +1,58 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Basic test that counts user cycles and takes EBBs.
+ */
+int cycles(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 10) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(!ebb_check_count(1, sample_period, 100));
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles, "cycles");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
new file mode 100644
index 0000000..754b3f2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c

@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while using MMCR0_FC (freeze counters) to only count
+ * parts of the code. This is complicated by the fact that FC is set by the
+ * hardware when the event overflows. We may take the EBB after we have set FC,
+ * so we have to be careful about whether we clear FC at the end of the EBB
+ * handler or not.
+ */
+
+static bool counters_frozen = false;
+static int ebbs_while_frozen = 0;
+
+static void ebb_callee(void)
+{
+	uint64_t mask, val;
+
+	mask = MMCR0_PMAO | MMCR0_FC;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	if (counters_frozen) {
+		trace_log_string(ebb_state.trace, "frozen");
+		ebbs_while_frozen++;
+		mask &= ~MMCR0_FC;
+	}
+
+	count_pmc(1, sample_period);
+out:
+	reset_ebb_with_clear_mask(mask);
+}
+
+int cycles_with_freeze(void)
+{
+	struct event event;
+	uint64_t val;
+	bool fc_cleared;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	fc_cleared = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		counters_frozen = false;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+		FAIL_IF(core_busy_loop());
+
+		counters_frozen = true;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
+
+		val = mfspr(SPRN_MMCR0);
+		if (! (val & MMCR0_FC)) {
+			printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
+			fc_cleared = true;
+		}
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	printf("EBBs while frozen %d\n", ebbs_while_frozen);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(fc_cleared);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_freeze, "cycles_with_freeze");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
new file mode 100644
index 0000000..1b46be9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c

@@ -0,0 +1,727 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <sched.h>
+#include <sys/wait.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "trace.h"
+#include "reg.h"
+#include "ebb.h"
+
+
+void (*ebb_user_func)(void);
+
+void ebb_hook(void)
+{
+	if (ebb_user_func)
+		ebb_user_func();
+}
+
+struct ebb_state ebb_state;
+
+u64 sample_period = 0x40000000ull;
+
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
+{
+	u64 val;
+
+	/* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
+	/* 3) set MMCR0[PMAE]	- docs say BESCR[PME] should do this */
+	val = mfspr(SPRN_MMCR0);
+	mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
+
+	/* 4) clear BESCR[PMEO] */
+	mtspr(SPRN_BESCRR, BESCR_PMEO);
+
+	/* 5) set BESCR[PME] */
+	mtspr(SPRN_BESCRS, BESCR_PME);
+
+	/* 6) rfebb 1 - done in our caller */
+}
+
+void reset_ebb(void)
+{
+	reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
+}
+
+/* Called outside of the EBB handler to check MMCR0 is sane */
+int ebb_check_mmcr0(void)
+{
+	u64 val;
+
+	val = mfspr(SPRN_MMCR0);
+	if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
+		/* It's OK if we see FC & PMAO, but not FC by itself */
+		printf("Outside of loop, only FC set 0x%llx\n", val);
+		return 1;
+	}
+
+	return 0;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge)
+{
+	u64 count, upper, lower;
+
+	count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
+
+	lower = ebb_state.stats.ebb_count * (sample_period - fudge);
+
+	if (count < lower) {
+		printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
+			pmc, count, lower, lower - count);
+		return false;
+	}
+
+	upper = ebb_state.stats.ebb_count * (sample_period + fudge);
+
+	if (count > upper) {
+		printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
+			pmc, count, upper, count - upper);
+		return false;
+	}
+
+	printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
+		pmc, count, lower, upper, count - lower, upper - count);
+
+	return true;
+}
+
+void standard_ebb_callee(void)
+{
+	int found, i;
+	u64 val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	found = 0;
+	for (i = 1; i <= 6; i++) {
+		if (ebb_state.pmc_enable[PMC_INDEX(i)])
+			found += count_pmc(i, sample_period);
+	}
+
+	if (!found)
+		ebb_state.stats.no_overflow++;
+
+out:
+	reset_ebb();
+}
+
+extern void ebb_handler(void);
+
+void setup_ebb_handler(void (*callee)(void))
+{
+	u64 entry;
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	entry = (u64)ebb_handler;
+#else
+	struct opd
+	{
+	    u64 entry;
+	    u64 toc;
+	} *opd;
+
+	opd = (struct opd *)ebb_handler;
+	entry = opd->entry;
+#endif
+	printf("EBB Handler is at %#llx\n", entry);
+
+	ebb_user_func = callee;
+
+	/* Ensure ebb_user_func is set before we set the handler */
+	mb();
+	mtspr(SPRN_EBBHR, entry);
+
+	/* Make sure the handler is set before we return */
+	mb();
+}
+
+void clear_ebb_stats(void)
+{
+	memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
+}
+
+void dump_summary_ebb_state(void)
+{
+	printf("ebb_state:\n"			\
+	       "  ebb_count    = %d\n"		\
+	       "  spurious     = %d\n"		\
+	       "  negative     = %d\n"		\
+	       "  no_overflow  = %d\n"		\
+	       "  pmc[1] count = 0x%llx\n"	\
+	       "  pmc[2] count = 0x%llx\n"	\
+	       "  pmc[3] count = 0x%llx\n"	\
+	       "  pmc[4] count = 0x%llx\n"	\
+	       "  pmc[5] count = 0x%llx\n"	\
+	       "  pmc[6] count = 0x%llx\n",
+		ebb_state.stats.ebb_count, ebb_state.stats.spurious,
+		ebb_state.stats.negative, ebb_state.stats.no_overflow,
+		ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
+		ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
+		ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
+}
+
+static char *decode_mmcr0(u32 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1 << 31))
+		strcat(buf, "FC ");
+	if (value & (1 << 26))
+		strcat(buf, "PMAE ");
+	if (value & (1 << 7))
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+static char *decode_bescr(u64 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1ull << 63))
+		strcat(buf, "GE ");
+	if (value & (1ull << 32))
+		strcat(buf, "PMAE ");
+	if (value & 1)
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+void dump_ebb_hw_state(void)
+{
+	u64 bescr;
+	u32 mmcr0;
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+	bescr = mfspr(SPRN_BESCR);
+
+	printf("HW state:\n"		\
+	       "MMCR0 0x%016x %s\n"	\
+	       "EBBHR 0x%016lx\n"	\
+	       "BESCR 0x%016llx %s\n"	\
+	       "PMC1  0x%016lx\n"	\
+	       "PMC2  0x%016lx\n"	\
+	       "PMC3  0x%016lx\n"	\
+	       "PMC4  0x%016lx\n"	\
+	       "PMC5  0x%016lx\n"	\
+	       "PMC6  0x%016lx\n"	\
+	       "SIAR  0x%016lx\n",
+	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_EBBHR), bescr,
+	       decode_bescr(bescr), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
+	       mfspr(SPRN_PMC3), mfspr(SPRN_PMC4), mfspr(SPRN_PMC5),
+	       mfspr(SPRN_PMC6), mfspr(SPRN_SIAR));
+}
+
+void dump_ebb_state(void)
+{
+	dump_summary_ebb_state();
+
+	dump_ebb_hw_state();
+
+	trace_buffer_print(ebb_state.trace);
+}
+
+int count_pmc(int pmc, uint32_t sample_period)
+{
+	uint32_t start_value;
+	u64 val;
+
+	/* 0) Read PMC */
+	start_value = pmc_sample_period(sample_period);
+
+	val = read_pmc(pmc);
+	if (val < start_value)
+		ebb_state.stats.negative++;
+	else
+		ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
+
+	trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
+
+	/* 1) Reset PMC */
+	write_pmc(pmc, start_value);
+
+	/* Report if we overflowed */
+	return val >= COUNTER_OVERFLOW;
+}
+
+int ebb_event_enable(struct event *e)
+{
+	int rc;
+
+	/* Ensure any SPR writes are ordered vs us */
+	mb();
+
+	rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+	if (rc)
+		return rc;
+
+	rc = event_read(e);
+
+	/* Ditto */
+	mb();
+
+	return rc;
+}
+
+void ebb_freeze_pmcs(void)
+{
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+	mb();
+}
+
+void ebb_unfreeze_pmcs(void)
+{
+	/* Unfreeze counters */
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	mb();
+}
+
+void ebb_global_enable(void)
+{
+	/* Enable EBBs globally and PMU EBBs */
+	mtspr(SPRN_BESCR, 0x8000000100000000ull);
+	mb();
+}
+
+void ebb_global_disable(void)
+{
+	/* Disable EBBs & freeze counters, events are still scheduled */
+	mtspr(SPRN_BESCRR, BESCR_PME);
+	mb();
+}
+
+void event_ebb_init(struct event *e)
+{
+	e->attr.config |= (1ull << 63);
+}
+
+void event_bhrb_init(struct event *e, unsigned ifm)
+{
+	e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
+}
+
+void event_leader_ebb_init(struct event *e)
+{
+	event_ebb_init(e);
+
+	e->attr.exclusive = 1;
+	e->attr.pinned = 1;
+}
+
+int core_busy_loop(void)
+{
+	int rc;
+
+	asm volatile (
+		"li  3,  0x3030\n"
+		"std 3,  -96(1)\n"
+		"li  4,  0x4040\n"
+		"std 4,  -104(1)\n"
+		"li  5,  0x5050\n"
+		"std 5,  -112(1)\n"
+		"li  6,  0x6060\n"
+		"std 6,  -120(1)\n"
+		"li  7,  0x7070\n"
+		"std 7,  -128(1)\n"
+		"li  8,  0x0808\n"
+		"std 8,  -136(1)\n"
+		"li  9,  0x0909\n"
+		"std 9,  -144(1)\n"
+		"li  10, 0x1010\n"
+		"std 10, -152(1)\n"
+		"li  11, 0x1111\n"
+		"std 11, -160(1)\n"
+		"li  14, 0x1414\n"
+		"std 14, -168(1)\n"
+		"li  15, 0x1515\n"
+		"std 15, -176(1)\n"
+		"li  16, 0x1616\n"
+		"std 16, -184(1)\n"
+		"li  17, 0x1717\n"
+		"std 17, -192(1)\n"
+		"li  18, 0x1818\n"
+		"std 18, -200(1)\n"
+		"li  19, 0x1919\n"
+		"std 19, -208(1)\n"
+		"li  20, 0x2020\n"
+		"std 20, -216(1)\n"
+		"li  21, 0x2121\n"
+		"std 21, -224(1)\n"
+		"li  22, 0x2222\n"
+		"std 22, -232(1)\n"
+		"li  23, 0x2323\n"
+		"std 23, -240(1)\n"
+		"li  24, 0x2424\n"
+		"std 24, -248(1)\n"
+		"li  25, 0x2525\n"
+		"std 25, -256(1)\n"
+		"li  26, 0x2626\n"
+		"std 26, -264(1)\n"
+		"li  27, 0x2727\n"
+		"std 27, -272(1)\n"
+		"li  28, 0x2828\n"
+		"std 28, -280(1)\n"
+		"li  29, 0x2929\n"
+		"std 29, -288(1)\n"
+		"li  30, 0x3030\n"
+		"li  31, 0x3131\n"
+
+		"li    3,  0\n"
+		"0: "
+		"addi  3, 3, 1\n"
+		"cmpwi 3, 100\n"
+		"blt   0b\n"
+
+		/* Return 1 (fail) unless we get through all the checks */
+		"li	0, 1\n"
+
+		/* Check none of our registers have been corrupted */
+		"cmpwi  4,  0x4040\n"
+		"bne	1f\n"
+		"cmpwi  5,  0x5050\n"
+		"bne	1f\n"
+		"cmpwi  6,  0x6060\n"
+		"bne	1f\n"
+		"cmpwi  7,  0x7070\n"
+		"bne	1f\n"
+		"cmpwi  8,  0x0808\n"
+		"bne	1f\n"
+		"cmpwi  9,  0x0909\n"
+		"bne	1f\n"
+		"cmpwi  10, 0x1010\n"
+		"bne	1f\n"
+		"cmpwi  11, 0x1111\n"
+		"bne	1f\n"
+		"cmpwi  14, 0x1414\n"
+		"bne	1f\n"
+		"cmpwi  15, 0x1515\n"
+		"bne	1f\n"
+		"cmpwi  16, 0x1616\n"
+		"bne	1f\n"
+		"cmpwi  17, 0x1717\n"
+		"bne	1f\n"
+		"cmpwi  18, 0x1818\n"
+		"bne	1f\n"
+		"cmpwi  19, 0x1919\n"
+		"bne	1f\n"
+		"cmpwi  20, 0x2020\n"
+		"bne	1f\n"
+		"cmpwi  21, 0x2121\n"
+		"bne	1f\n"
+		"cmpwi  22, 0x2222\n"
+		"bne	1f\n"
+		"cmpwi  23, 0x2323\n"
+		"bne	1f\n"
+		"cmpwi  24, 0x2424\n"
+		"bne	1f\n"
+		"cmpwi  25, 0x2525\n"
+		"bne	1f\n"
+		"cmpwi  26, 0x2626\n"
+		"bne	1f\n"
+		"cmpwi  27, 0x2727\n"
+		"bne	1f\n"
+		"cmpwi  28, 0x2828\n"
+		"bne	1f\n"
+		"cmpwi  29, 0x2929\n"
+		"bne	1f\n"
+		"cmpwi  30, 0x3030\n"
+		"bne	1f\n"
+		"cmpwi  31, 0x3131\n"
+		"bne	1f\n"
+
+		/* Load junk into all our registers before we reload them from the stack. */
+		"li  3,  0xde\n"
+		"li  4,  0xad\n"
+		"li  5,  0xbe\n"
+		"li  6,  0xef\n"
+		"li  7,  0xde\n"
+		"li  8,  0xad\n"
+		"li  9,  0xbe\n"
+		"li  10, 0xef\n"
+		"li  11, 0xde\n"
+		"li  14, 0xad\n"
+		"li  15, 0xbe\n"
+		"li  16, 0xef\n"
+		"li  17, 0xde\n"
+		"li  18, 0xad\n"
+		"li  19, 0xbe\n"
+		"li  20, 0xef\n"
+		"li  21, 0xde\n"
+		"li  22, 0xad\n"
+		"li  23, 0xbe\n"
+		"li  24, 0xef\n"
+		"li  25, 0xde\n"
+		"li  26, 0xad\n"
+		"li  27, 0xbe\n"
+		"li  28, 0xef\n"
+		"li  29, 0xdd\n"
+
+		"ld     3,  -96(1)\n"
+		"cmpwi  3,  0x3030\n"
+		"bne	1f\n"
+		"ld     4,  -104(1)\n"
+		"cmpwi  4,  0x4040\n"
+		"bne	1f\n"
+		"ld     5,  -112(1)\n"
+		"cmpwi  5,  0x5050\n"
+		"bne	1f\n"
+		"ld     6,  -120(1)\n"
+		"cmpwi  6,  0x6060\n"
+		"bne	1f\n"
+		"ld     7,  -128(1)\n"
+		"cmpwi  7,  0x7070\n"
+		"bne	1f\n"
+		"ld     8,  -136(1)\n"
+		"cmpwi  8,  0x0808\n"
+		"bne	1f\n"
+		"ld     9,  -144(1)\n"
+		"cmpwi  9,  0x0909\n"
+		"bne	1f\n"
+		"ld     10, -152(1)\n"
+		"cmpwi  10, 0x1010\n"
+		"bne	1f\n"
+		"ld     11, -160(1)\n"
+		"cmpwi  11, 0x1111\n"
+		"bne	1f\n"
+		"ld     14, -168(1)\n"
+		"cmpwi  14, 0x1414\n"
+		"bne	1f\n"
+		"ld     15, -176(1)\n"
+		"cmpwi  15, 0x1515\n"
+		"bne	1f\n"
+		"ld     16, -184(1)\n"
+		"cmpwi  16, 0x1616\n"
+		"bne	1f\n"
+		"ld     17, -192(1)\n"
+		"cmpwi  17, 0x1717\n"
+		"bne	1f\n"
+		"ld     18, -200(1)\n"
+		"cmpwi  18, 0x1818\n"
+		"bne	1f\n"
+		"ld     19, -208(1)\n"
+		"cmpwi  19, 0x1919\n"
+		"bne	1f\n"
+		"ld     20, -216(1)\n"
+		"cmpwi  20, 0x2020\n"
+		"bne	1f\n"
+		"ld     21, -224(1)\n"
+		"cmpwi  21, 0x2121\n"
+		"bne	1f\n"
+		"ld     22, -232(1)\n"
+		"cmpwi  22, 0x2222\n"
+		"bne	1f\n"
+		"ld     23, -240(1)\n"
+		"cmpwi  23, 0x2323\n"
+		"bne	1f\n"
+		"ld     24, -248(1)\n"
+		"cmpwi  24, 0x2424\n"
+		"bne	1f\n"
+		"ld     25, -256(1)\n"
+		"cmpwi  25, 0x2525\n"
+		"bne	1f\n"
+		"ld     26, -264(1)\n"
+		"cmpwi  26, 0x2626\n"
+		"bne	1f\n"
+		"ld     27, -272(1)\n"
+		"cmpwi  27, 0x2727\n"
+		"bne	1f\n"
+		"ld     28, -280(1)\n"
+		"cmpwi  28, 0x2828\n"
+		"bne	1f\n"
+		"ld     29, -288(1)\n"
+		"cmpwi  29, 0x2929\n"
+		"bne	1f\n"
+
+		/* Load 0 (success) to return */
+		"li	0, 0\n"
+
+		"1: 	mr %0, 0\n"
+
+		: "=r" (rc)
+		: /* no inputs */
+		: "3", "4", "5", "6", "7", "8", "9", "10", "11", "14",
+		  "15", "16", "17", "18", "19", "20", "21", "22", "23",
+		   "24", "25", "26", "27", "28", "29", "30", "31",
+		   "memory"
+	);
+
+	return rc;
+}
+
+int core_busy_loop_with_freeze(void)
+{
+	int rc;
+
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	rc = core_busy_loop();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
+
+	return rc;
+}
+
+int ebb_child(union pipe read_pipe, union pipe write_pipe)
+{
+	struct event event;
+	uint64_t val;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(event_enable(&event));
+
+	if (event_read(&event)) {
+		/*
+		 * Some tests expect to fail here, so don't report an error on
+		 * this line, and return a distinguisable error code. Tell the
+		 * parent an error happened.
+		 */
+		notify_parent_of_error(write_pipe);
+		return 2;
+	}
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	FAIL_IF(notify_parent(write_pipe));
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+
+		/* To try and hit SIGILL case */
+		val  = mfspr(SPRN_MMCRA);
+		val |= mfspr(SPRN_MMCR2);
+		val |= mfspr(SPRN_MMCR0);
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+static jmp_buf setjmp_env;
+
+static void sigill_handler(int signal)
+{
+	printf("Took sigill\n");
+	longjmp(setjmp_env, 1);
+}
+
+static struct sigaction sigill_action = {
+	.sa_handler = sigill_handler,
+};
+
+int catch_sigill(void (*func)(void))
+{
+	if (sigaction(SIGILL, &sigill_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	if (setjmp(setjmp_env) == 0) {
+		func();
+		return 1;
+	}
+
+	return 0;
+}
+
+void write_pmc1(void)
+{
+	mtspr(SPRN_PMC1, 0);
+}
+
+void write_pmc(int pmc, u64 value)
+{
+	switch (pmc) {
+		case 1: mtspr(SPRN_PMC1, value); break;
+		case 2: mtspr(SPRN_PMC2, value); break;
+		case 3: mtspr(SPRN_PMC3, value); break;
+		case 4: mtspr(SPRN_PMC4, value); break;
+		case 5: mtspr(SPRN_PMC5, value); break;
+		case 6: mtspr(SPRN_PMC6, value); break;
+	}
+}
+
+u64 read_pmc(int pmc)
+{
+	switch (pmc) {
+		case 1: return mfspr(SPRN_PMC1);
+		case 2: return mfspr(SPRN_PMC2);
+		case 3: return mfspr(SPRN_PMC3);
+		case 4: return mfspr(SPRN_PMC4);
+		case 5: return mfspr(SPRN_PMC5);
+		case 6: return mfspr(SPRN_PMC6);
+	}
+
+	return 0;
+}
+
+static void term_handler(int signal)
+{
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+	abort();
+}
+
+struct sigaction term_action = {
+	.sa_handler = term_handler,
+};
+
+static void __attribute__((constructor)) ebb_init(void)
+{
+	clear_ebb_stats();
+
+	if (sigaction(SIGTERM, &term_action, NULL))
+		perror("sigaction");
+
+	ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
new file mode 100644
index 0000000..e62bde0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h

@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+
+#include "../event.h"
+#include "../lib.h"
+#include "trace.h"
+#include "reg.h"
+
+#define PMC_INDEX(pmc)	((pmc)-1)
+
+#define NUM_PMC_VALUES	128
+
+struct ebb_state
+{
+	struct {
+		u64 pmc_count[6];
+		volatile int ebb_count;
+		int spurious;
+		int negative;
+		int no_overflow;
+	} stats;
+
+	bool pmc_enable[6];
+	struct trace_buffer *trace;
+};
+
+extern struct ebb_state ebb_state;
+
+#define COUNTER_OVERFLOW 0x80000000ull
+
+static inline uint32_t pmc_sample_period(uint32_t value)
+{
+	return COUNTER_OVERFLOW - value;
+}
+
+static inline void ebb_enable_pmc_counting(int pmc)
+{
+	ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge);
+void event_leader_ebb_init(struct event *e);
+void event_ebb_init(struct event *e);
+void event_bhrb_init(struct event *e, unsigned ifm);
+void setup_ebb_handler(void (*callee)(void));
+void standard_ebb_callee(void);
+int ebb_event_enable(struct event *e);
+void ebb_global_enable(void);
+void ebb_global_disable(void);
+void ebb_freeze_pmcs(void);
+void ebb_unfreeze_pmcs(void);
+void event_ebb_init(struct event *e);
+void event_leader_ebb_init(struct event *e);
+int count_pmc(int pmc, uint32_t sample_period);
+void dump_ebb_state(void);
+void dump_summary_ebb_state(void);
+void dump_ebb_hw_state(void);
+void clear_ebb_stats(void);
+void write_pmc(int pmc, u64 value);
+u64 read_pmc(int pmc);
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
+void reset_ebb(void);
+int ebb_check_mmcr0(void);
+
+extern u64 sample_period;
+
+int core_busy_loop(void);
+int core_busy_loop_with_freeze(void);
+int ebb_child(union pipe read_pipe, union pipe write_pipe);
+int catch_sigill(void (*func)(void));
+void write_pmc1(void);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
new file mode 100644
index 0000000..14274ea
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S

@@ -0,0 +1,365 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
+#define r1 1
+#define r2 2
+
+#define RFEBB   .long 0x4c000924
+
+/* Stack layout:
+ *
+ *                   ^
+ *  User stack       |
+ *  Back chain ------+	<- r1		<-------+
+ *  ...						|
+ *  Red zone / ABI Gap				|
+ *  ...						|
+ *  vr63	<+				|
+ *  vr0		 |				|
+ *  VSCR	 |				|
+ *  FSCR	 |				|
+ *  r31		 | Save area			|
+ *  r0		 |				|
+ *  XER		 |				|
+ *  CTR		 |				|
+ *  LR		 |				|
+ *  CCR		<+				|
+ *  ...		<+				|
+ *  LR		 | Caller frame			|
+ *  CCR		 |				|
+ *  Back chain	<+	<- updated r1	--------+
+ *
+ */
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ABIGAP		512
+#else
+#define ABIGAP		288
+#endif
+
+#define NR_GPR		32
+#define NR_SPR		6
+#define NR_VSR		64
+
+#define SAVE_AREA	((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
+#define CALLER_FRAME	112
+
+#define STACK_FRAME	(ABIGAP + SAVE_AREA + CALLER_FRAME)
+
+#define CCR_SAVE	(CALLER_FRAME)
+#define LR_SAVE		(CCR_SAVE + 8)
+#define CTR_SAVE	(LR_SAVE  + 8)
+#define XER_SAVE	(CTR_SAVE + 8)
+#define GPR_SAVE(n)	(XER_SAVE + 8 + (8 * n))
+#define FSCR_SAVE	(GPR_SAVE(31) + 8)
+#define VSCR_SAVE	(FSCR_SAVE + 8)
+#define VSR_SAVE(n)	(VSCR_SAVE + 8 + (16 * n))
+
+#define SAVE_GPR(n)	std n,GPR_SAVE(n)(r1)
+#define REST_GPR(n)	ld  n,GPR_SAVE(n)(r1)
+#define TRASH_GPR(n)	lis n,0xaaaa
+
+#define SAVE_VSR(n, b)	li b, VSR_SAVE(n); stxvd2x n,b,r1
+#define LOAD_VSR(n, b)	li b, VSR_SAVE(n); lxvd2x  n,b,r1
+
+#define LOAD_REG_IMMEDIATE(reg,expr)	\
+	lis     reg,(expr)@highest;	\
+	ori     reg,reg,(expr)@higher;	\
+	rldicr  reg,reg,32,31;		\
+	oris    reg,reg,(expr)@h;	\
+	ori     reg,reg,(expr)@l;
+
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ENTRY_POINT(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+	FUNC_NAME(name):
+
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer using our entry point */	\
+	LOAD_REG_IMMEDIATE(r12, name)				\
+0:	addis	r2,r12,(.TOC.-0b)@ha;				\
+	addi	r2,r2,(.TOC.-0b)@l;
+
+#else
+#define ENTRY_POINT(name) FUNC_START(name)
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer via our opd entry */	\
+	LOAD_REG_IMMEDIATE(r2, name)			\
+	ld      r2,8(r2);
+#endif
+
+    .text
+
+ENTRY_POINT(ebb_handler)
+    stdu    r1,-STACK_FRAME(r1)
+    SAVE_GPR(0)
+    mflr    r0
+    std     r0,LR_SAVE(r1)
+    mfcr    r0
+    std     r0,CCR_SAVE(r1)
+    mfctr   r0
+    std     r0,CTR_SAVE(r1)
+    mfxer   r0
+    std     r0,XER_SAVE(r1)
+    SAVE_GPR(2)
+    SAVE_GPR(3)
+    SAVE_GPR(4)
+    SAVE_GPR(5)
+    SAVE_GPR(6)
+    SAVE_GPR(7)
+    SAVE_GPR(8)
+    SAVE_GPR(9)
+    SAVE_GPR(10)
+    SAVE_GPR(11)
+    SAVE_GPR(12)
+    SAVE_GPR(13)
+    SAVE_GPR(14)
+    SAVE_GPR(15)
+    SAVE_GPR(16)
+    SAVE_GPR(17)
+    SAVE_GPR(18)
+    SAVE_GPR(19)
+    SAVE_GPR(20)
+    SAVE_GPR(21)
+    SAVE_GPR(22)
+    SAVE_GPR(23)
+    SAVE_GPR(24)
+    SAVE_GPR(25)
+    SAVE_GPR(26)
+    SAVE_GPR(27)
+    SAVE_GPR(28)
+    SAVE_GPR(29)
+    SAVE_GPR(30)
+    SAVE_GPR(31)
+    SAVE_VSR(0, r3)
+    mffs     f0
+    stfd     f0, FSCR_SAVE(r1)
+    mfvscr   f0
+    stfd     f0, VSCR_SAVE(r1)
+    SAVE_VSR(1,  r3)
+    SAVE_VSR(2,  r3)
+    SAVE_VSR(3,  r3)
+    SAVE_VSR(4,  r3)
+    SAVE_VSR(5,  r3)
+    SAVE_VSR(6,  r3)
+    SAVE_VSR(7,  r3)
+    SAVE_VSR(8,  r3)
+    SAVE_VSR(9,  r3)
+    SAVE_VSR(10, r3)
+    SAVE_VSR(11, r3)
+    SAVE_VSR(12, r3)
+    SAVE_VSR(13, r3)
+    SAVE_VSR(14, r3)
+    SAVE_VSR(15, r3)
+    SAVE_VSR(16, r3)
+    SAVE_VSR(17, r3)
+    SAVE_VSR(18, r3)
+    SAVE_VSR(19, r3)
+    SAVE_VSR(20, r3)
+    SAVE_VSR(21, r3)
+    SAVE_VSR(22, r3)
+    SAVE_VSR(23, r3)
+    SAVE_VSR(24, r3)
+    SAVE_VSR(25, r3)
+    SAVE_VSR(26, r3)
+    SAVE_VSR(27, r3)
+    SAVE_VSR(28, r3)
+    SAVE_VSR(29, r3)
+    SAVE_VSR(30, r3)
+    SAVE_VSR(31, r3)
+    SAVE_VSR(32, r3)
+    SAVE_VSR(33, r3)
+    SAVE_VSR(34, r3)
+    SAVE_VSR(35, r3)
+    SAVE_VSR(36, r3)
+    SAVE_VSR(37, r3)
+    SAVE_VSR(38, r3)
+    SAVE_VSR(39, r3)
+    SAVE_VSR(40, r3)
+    SAVE_VSR(41, r3)
+    SAVE_VSR(42, r3)
+    SAVE_VSR(43, r3)
+    SAVE_VSR(44, r3)
+    SAVE_VSR(45, r3)
+    SAVE_VSR(46, r3)
+    SAVE_VSR(47, r3)
+    SAVE_VSR(48, r3)
+    SAVE_VSR(49, r3)
+    SAVE_VSR(50, r3)
+    SAVE_VSR(51, r3)
+    SAVE_VSR(52, r3)
+    SAVE_VSR(53, r3)
+    SAVE_VSR(54, r3)
+    SAVE_VSR(55, r3)
+    SAVE_VSR(56, r3)
+    SAVE_VSR(57, r3)
+    SAVE_VSR(58, r3)
+    SAVE_VSR(59, r3)
+    SAVE_VSR(60, r3)
+    SAVE_VSR(61, r3)
+    SAVE_VSR(62, r3)
+    SAVE_VSR(63, r3)
+
+    TRASH_GPR(2)
+    TRASH_GPR(3)
+    TRASH_GPR(4)
+    TRASH_GPR(5)
+    TRASH_GPR(6)
+    TRASH_GPR(7)
+    TRASH_GPR(8)
+    TRASH_GPR(9)
+    TRASH_GPR(10)
+    TRASH_GPR(11)
+    TRASH_GPR(12)
+    TRASH_GPR(14)
+    TRASH_GPR(15)
+    TRASH_GPR(16)
+    TRASH_GPR(17)
+    TRASH_GPR(18)
+    TRASH_GPR(19)
+    TRASH_GPR(20)
+    TRASH_GPR(21)
+    TRASH_GPR(22)
+    TRASH_GPR(23)
+    TRASH_GPR(24)
+    TRASH_GPR(25)
+    TRASH_GPR(26)
+    TRASH_GPR(27)
+    TRASH_GPR(28)
+    TRASH_GPR(29)
+    TRASH_GPR(30)
+    TRASH_GPR(31)
+
+    RESTORE_TOC(ebb_handler)
+
+    /*
+     * r13 is our TLS pointer. We leave whatever value was in there when the
+     * EBB fired. That seems to be OK because once set the TLS pointer is not
+     * changed - but presumably that could change in future.
+     */
+
+    bl      ebb_hook
+    nop
+
+    /* r2 may be changed here but we don't care */
+
+    lfd      f0, FSCR_SAVE(r1)
+    mtfsf    0xff,f0
+    lfd      f0, VSCR_SAVE(r1)
+    mtvscr   f0
+    LOAD_VSR(0, r3)
+    LOAD_VSR(1,  r3)
+    LOAD_VSR(2,  r3)
+    LOAD_VSR(3,  r3)
+    LOAD_VSR(4,  r3)
+    LOAD_VSR(5,  r3)
+    LOAD_VSR(6,  r3)
+    LOAD_VSR(7,  r3)
+    LOAD_VSR(8,  r3)
+    LOAD_VSR(9,  r3)
+    LOAD_VSR(10, r3)
+    LOAD_VSR(11, r3)
+    LOAD_VSR(12, r3)
+    LOAD_VSR(13, r3)
+    LOAD_VSR(14, r3)
+    LOAD_VSR(15, r3)
+    LOAD_VSR(16, r3)
+    LOAD_VSR(17, r3)
+    LOAD_VSR(18, r3)
+    LOAD_VSR(19, r3)
+    LOAD_VSR(20, r3)
+    LOAD_VSR(21, r3)
+    LOAD_VSR(22, r3)
+    LOAD_VSR(23, r3)
+    LOAD_VSR(24, r3)
+    LOAD_VSR(25, r3)
+    LOAD_VSR(26, r3)
+    LOAD_VSR(27, r3)
+    LOAD_VSR(28, r3)
+    LOAD_VSR(29, r3)
+    LOAD_VSR(30, r3)
+    LOAD_VSR(31, r3)
+    LOAD_VSR(32, r3)
+    LOAD_VSR(33, r3)
+    LOAD_VSR(34, r3)
+    LOAD_VSR(35, r3)
+    LOAD_VSR(36, r3)
+    LOAD_VSR(37, r3)
+    LOAD_VSR(38, r3)
+    LOAD_VSR(39, r3)
+    LOAD_VSR(40, r3)
+    LOAD_VSR(41, r3)
+    LOAD_VSR(42, r3)
+    LOAD_VSR(43, r3)
+    LOAD_VSR(44, r3)
+    LOAD_VSR(45, r3)
+    LOAD_VSR(46, r3)
+    LOAD_VSR(47, r3)
+    LOAD_VSR(48, r3)
+    LOAD_VSR(49, r3)
+    LOAD_VSR(50, r3)
+    LOAD_VSR(51, r3)
+    LOAD_VSR(52, r3)
+    LOAD_VSR(53, r3)
+    LOAD_VSR(54, r3)
+    LOAD_VSR(55, r3)
+    LOAD_VSR(56, r3)
+    LOAD_VSR(57, r3)
+    LOAD_VSR(58, r3)
+    LOAD_VSR(59, r3)
+    LOAD_VSR(60, r3)
+    LOAD_VSR(61, r3)
+    LOAD_VSR(62, r3)
+    LOAD_VSR(63, r3)
+
+    ld      r0,XER_SAVE(r1)
+    mtxer   r0
+    ld      r0,CTR_SAVE(r1)
+    mtctr   r0
+    ld      r0,LR_SAVE(r1)
+    mtlr    r0
+    ld      r0,CCR_SAVE(r1)
+    mtcr    r0
+    REST_GPR(0)
+    REST_GPR(2)
+    REST_GPR(3)
+    REST_GPR(4)
+    REST_GPR(5)
+    REST_GPR(6)
+    REST_GPR(7)
+    REST_GPR(8)
+    REST_GPR(9)
+    REST_GPR(10)
+    REST_GPR(11)
+    REST_GPR(12)
+    REST_GPR(13)
+    REST_GPR(14)
+    REST_GPR(15)
+    REST_GPR(16)
+    REST_GPR(17)
+    REST_GPR(18)
+    REST_GPR(19)
+    REST_GPR(20)
+    REST_GPR(21)
+    REST_GPR(22)
+    REST_GPR(23)
+    REST_GPR(24)
+    REST_GPR(25)
+    REST_GPR(26)
+    REST_GPR(27)
+    REST_GPR(28)
+    REST_GPR(29)
+    REST_GPR(30)
+    REST_GPR(31)
+    addi    r1,r1,STACK_FRAME
+    RFEBB
+FUNC_END(ebb_handler)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
new file mode 100644
index 0000000..c45f948
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. Nothing interesting happens, because
+ * even though the event is enabled and running the child hasn't enabled the
+ * actual delivery of the EBBs.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	int i;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Parent creates EBB event */
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Check the EBB is enabled by writing PMC1 */
+	write_pmc1();
+
+	/* EBB event is enabled here */
+	for (i = 0; i < 1000000; i++) ;
+
+	return 0;
+}
+
+int ebb_on_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child should just exit happily */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_child, "ebb_on_child");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
new file mode 100644
index 0000000..11acf1d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c

@@ -0,0 +1,92 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. The child expects this and enables
+ * EBBs, which are then delivered to the child, even though the event is
+ * created by the parent.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	/* Setup our EBB handler, before the EBB event is created */
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+/* Tests we can setup an EBB on our child - if it's expecting it */
+int ebb_on_willing_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to setup its EBB handler */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	/* Child show now take EBBs and then exit */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
new file mode 100644
index 0000000..be4dd5a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int ebb_vs_cpu_event(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Now try to install our CPU event */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* .. and wait for it to complete */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run, but we don't expect 100% */
+	FAIL_IF(event.result.enabled >= event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
new file mode 100644
index 0000000..7e78153
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c

@@ -0,0 +1,131 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test various attributes of the EBB event are enforced.
+ */
+int event_attributes(void)
+{
+	struct event event, leader;
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to succeed */
+	FAIL_IF(event_open(&event));
+	event_close(&event);
+
+
+	event_init(&event, 0x001e); /* CYCLES - no PMC specified */
+	event_leader_ebb_init(&event);
+	/* Expected to fail, no PMC specified */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+	event.attr.exclusive = 0;
+	/* Expected to fail, not exclusive */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x3001e);
+	event_leader_ebb_init(&event);
+	event.attr.freq = 1;
+	/* Expected to fail, sets freq */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x4001e);
+	event_leader_ebb_init(&event);
+	event.attr.sample_period = 1;
+	/* Expected to fail, sets sample_period */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.enable_on_exec = 1;
+	/* Expected to fail, sets enable_on_exec */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.inherit = 1;
+	/* Expected to fail, sets inherit */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to succeed */
+	FAIL_IF(event_open_with_group(&event, leader.fd));
+	event_close(&leader);
+	event_close(&event);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+
+	/* Expected to fail, event doesn't request EBB, leader does */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	/* Clear the EBB flag */
+	leader.attr.config &= ~(1ull << 63);
+
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to fail, leader doesn't request EBB */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.exclusive = 0;
+	/* Expected to fail, leader isn't exclusive */
+	FAIL_IF(event_open(&leader) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.pinned = 0;
+	/* Expected to fail, leader isn't pinned */
+	FAIL_IF(event_open(&leader) == 0);
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to fail, not a task event */
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(&event, 0) == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(event_attributes, "event_attributes");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
new file mode 100644
index 0000000..b866a05
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S

@@ -0,0 +1,43 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(thirty_two_instruction_loop)
+	cmpwi	r3,0
+	beqlr
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
new file mode 100644
index 0000000..9e7af6e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c

@@ -0,0 +1,79 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
+ * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
+ * the PMU.
+ */
+
+static struct event event;
+
+static int child(void)
+{
+	/* Even though we have EBE=0 we can still see the EBB regs */
+	FAIL_IF(mfspr(SPRN_BESCR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBHR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBRR) != 0);
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We can still read from the event, though it is on our parent */
+	FAIL_IF(event_read(&event));
+
+	return 0;
+}
+
+/* Tests that fork clears EBB state */
+int fork_cleanup(void)
+{
+	pid_t pid;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Don't need to actually take any EBBs */
+
+	pid = fork();
+	if (pid == 0)
+		exit(child());
+
+	/* Child does the actual testing */
+	FAIL_IF(wait_for_child(pid));
+
+	/* After fork */
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(fork_cleanup, "fork_cleanup");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
new file mode 100644
index 0000000..f8190fa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c

@@ -0,0 +1,164 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Run a calibrated instruction loop and count instructions executed using
+ * EBBs. Make sure the counts look right.
+ */
+
+extern void thirty_two_instruction_loop(uint64_t loops);
+
+static bool counters_frozen = true;
+
+static int do_count_loop(struct event *event, uint64_t instructions,
+			 uint64_t overhead, bool report)
+{
+	int64_t difference, expected;
+	double percentage;
+
+	clear_ebb_stats();
+
+	counters_frozen = false;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+	thirty_two_instruction_loop(instructions >> 5);
+
+	counters_frozen = true;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+	count_pmc(4, sample_period);
+
+	event->result.value = ebb_state.stats.pmc_count[4-1];
+	expected = instructions + overhead;
+	difference = event->result.value - expected;
+	percentage = (double)difference / event->result.value * 100;
+
+	if (report) {
+		printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
+		printf("Expected %lu\n", expected);
+		printf("Actual   %llu\n", event->result.value);
+		printf("Error    %ld, %f%%\n", difference, percentage);
+		printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
+	}
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference of up to 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / event->result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static uint64_t determine_overhead(struct event *event)
+{
+	uint64_t current, overhead;
+	int i;
+
+	do_count_loop(event, 0, 0, false);
+	overhead = event->result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(event, 0, 0, false);
+		current = event->result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %lu with %lu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+static void pmc4_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(4, sample_period);
+out:
+	if (counters_frozen)
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		reset_ebb();
+}
+
+int instruction_count(void)
+{
+	struct event event;
+	uint64_t overhead;
+
+	event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
+	event_leader_ebb_init(&event);
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = COUNTER_OVERFLOW;
+
+	setup_ebb_handler(pmc4_ebb_callee);
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	ebb_global_enable();
+
+	overhead = determine_overhead(&event);
+	printf("Overhead of null loop: %lu instructions\n", overhead);
+
+	/* Run for 1M instructions */
+	FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
+
+	/* Run for 10M instructions */
+	FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
+
+	/* Run for 100M instructions */
+	FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
+
+	/* Run for 1G instructions */
+	FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
+
+	/* Run for 16G instructions */
+	FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
+
+	/* Run for 64G instructions */
+	FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
+
+	/* Run for 128G instructions */
+	FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
+
+	ebb_global_disable();
+	event_close(&event);
+
+	printf("Finished OK\n");
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(instruction_count, "instruction_count");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
new file mode 100644
index 0000000..0c9dd9b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c

@@ -0,0 +1,100 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that tries to trigger CPU_FTR_PMAO_BUG. Which is a hardware defect
+ * where an exception triggers but we context switch before it is delivered and
+ * lose the exception.
+ */
+
+static int test_body(void)
+{
+	int i, orig_period, max_period;
+	struct event event;
+
+	/* We use PMC4 to make sure the kernel switches all counters correctly */
+	event_init_named(&event, 0x40002, "instructions");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(4);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	/*
+	 * We want a low sample period, but we also want to get out of the EBB
+	 * handler without tripping up again.
+	 *
+	 * This value picked after much experimentation.
+	 */
+	orig_period = max_period = sample_period = 400;
+
+	mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1000000) {
+		/*
+		 * We are trying to get the EBB exception to race exactly with
+		 * us entering the kernel to do the syscall. We then need the
+		 * kernel to decide our timeslice is up and context switch to
+		 * the other thread. When we come back our EBB will have been
+		 * lost and we'll spin in this while loop forever.
+		 */
+
+		for (i = 0; i < 100000; i++)
+			sched_yield();
+
+		/* Change the sample period slightly to try and hit the race */
+		if (sample_period >= (orig_period + 200))
+			sample_period = orig_period;
+		else
+			sample_period++;
+
+		if (sample_period > max_period)
+			max_period = sample_period;
+	}
+
+	ebb_freeze_pmcs();
+	ebb_global_disable();
+
+	count_pmc(4, sample_period);
+	mtspr(SPRN_PMC4, 0xdead);
+
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* We vary our sample period so we need extra fudge here */
+	FAIL_IF(!ebb_check_count(4, orig_period, 2 * (max_period - orig_period)));
+
+	return 0;
+}
+
+static int lost_exception(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(lost_exception, "lost_exception");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
new file mode 100644
index 0000000..67d78af
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c

@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test counting multiple events using EBBs.
+ */
+int multi_counter(void)
+{
+	struct event events[6];
+	int i, group_fd;
+
+	event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD");
+	event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU");
+	event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL");
+	event_init_named(&events[3], 0x4000A, "PM_CMPLU_STALL");
+	event_init_named(&events[4], 0x600f4, "PM_RUN_CYC");
+	event_init_named(&events[5], 0x500fa, "PM_RUN_INST_CMPL");
+
+	event_leader_ebb_init(&events[0]);
+	for (i = 1; i < 6; i++)
+		event_ebb_init(&events[i]);
+
+	group_fd = -1;
+	for (i = 0; i < 6; i++) {
+		events[i].attr.exclude_kernel = 1;
+		events[i].attr.exclude_hv = 1;
+		events[i].attr.exclude_idle = 1;
+
+		FAIL_IF(event_open_with_group(&events[i], group_fd));
+		if (group_fd == -1)
+			group_fd = events[0].fd;
+	}
+
+	ebb_enable_pmc_counting(1);
+	ebb_enable_pmc_counting(2);
+	ebb_enable_pmc_counting(3);
+	ebb_enable_pmc_counting(4);
+	ebb_enable_pmc_counting(5);
+	ebb_enable_pmc_counting(6);
+	setup_ebb_handler(standard_ebb_callee);
+
+	FAIL_IF(ioctl(events[0].fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP));
+	FAIL_IF(event_read(&events[0]));
+
+	ebb_global_enable();
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC3, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC5, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC6, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 50) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+	count_pmc(2, sample_period);
+	count_pmc(3, sample_period);
+	count_pmc(4, sample_period);
+	count_pmc(5, sample_period);
+	count_pmc(6, sample_period);
+
+	dump_ebb_state();
+
+	for (i = 0; i < 6; i++)
+		event_close(&events[i]);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(multi_counter, "multi_counter");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
new file mode 100644
index 0000000..b8dc371
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c

@@ -0,0 +1,109 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test running multiple EBB using processes at once on a single CPU. They
+ * should all run happily without interfering with each other.
+ */
+
+static bool child_should_exit;
+
+static void sigint_handler(int signal)
+{
+	child_should_exit = true;
+}
+
+struct sigaction sigint_action = {
+	.sa_handler = sigint_handler,
+};
+
+static int cycles_child(void)
+{
+	struct event event;
+
+	if (sigaction(SIGINT, &sigint_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (!child_should_exit) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_summary_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+#define NR_CHILDREN	4
+
+int multi_ebb_procs(void)
+{
+	pid_t pids[NR_CHILDREN];
+	int cpu, rc, i;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	for (i = 0; i < NR_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0)
+			exit(cycles_child());
+	}
+
+	/* Have them all run for "a while" */
+	sleep(10);
+
+	rc = 0;
+	for (i = 0; i < NR_CHILDREN; i++) {
+		/* Tell them to stop */
+		kill(pids[i], SIGINT);
+		/* And wait */
+		rc |= wait_for_child(pids[i]);
+	}
+
+	return rc;
+}
+
+int main(void)
+{
+	return test_harness(multi_ebb_procs, "multi_ebb_procs");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
new file mode 100644
index 0000000..2f9bf8e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c

@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/* Test that things work sanely if we have no handler */
+
+static int no_handler_test(void)
+{
+	struct event event;
+	u64 val;
+	int i;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	val = mfspr(SPRN_EBBHR);
+	FAIL_IF(val != 0);
+
+	/* Make sure it overflows quickly */
+	sample_period = 1000;
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Spin to make sure the event has time to overflow */
+	for (i = 0; i < 1000; i++)
+		mb();
+
+	dump_ebb_state();
+
+	/* We expect to see the PMU frozen & PMAO set */
+	val = mfspr(SPRN_MMCR0);
+	FAIL_IF(val != 0x0000000080000080);
+
+	event_close(&event);
+
+	dump_ebb_state();
+
+	/* The real test is that we never took an EBB at 0x0 */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(no_handler_test,"no_handler_test");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
new file mode 100644
index 0000000..986500f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c

@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that the kernel properly handles PMAE across context switches.
+ *
+ * We test this by calling into the kernel inside our EBB handler, where PMAE
+ * is clear. A cpu eater companion thread is running on the same CPU as us to
+ * encourage the scheduler to switch us.
+ *
+ * The kernel must make sure that when it context switches us back in, it
+ * honours the fact that we had PMAE clear.
+ *
+ * Observed to hit the failing case on the first EBB with a broken kernel.
+ */
+
+static bool mmcr0_mismatch;
+static uint64_t before, after;
+
+static void syscall_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(1, sample_period);
+
+	before = mfspr(SPRN_MMCR0);
+
+	/* Try and get ourselves scheduled, to force a PMU context switch */
+	sched_yield();
+
+	after = mfspr(SPRN_MMCR0);
+	if (before != after)
+		mmcr0_mismatch = true;
+
+out:
+	reset_ebb();
+}
+
+static int test_body(void)
+{
+	struct event event;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(syscall_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 20 && !mmcr0_mismatch)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	if (mmcr0_mismatch)
+		printf("Saw MMCR0 before 0x%lx after 0x%lx\n", before, after);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(mmcr0_mismatch);
+
+	return 0;
+}
+
+int pmae_handling(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(pmae_handling, "pmae_handling");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
new file mode 100644
index 0000000..a503fa7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that PMC5 & 6 are frozen (ie. don't overflow) when they are not being
+ * used. Tests the MMCR0_FC56 logic in the kernel.
+ */
+
+static int pmc56_overflowed;
+
+static void ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(2, sample_period);
+
+	val = mfspr(SPRN_PMC5);
+	if (val >= COUNTER_OVERFLOW)
+		pmc56_overflowed++;
+
+	count_pmc(5, COUNTER_OVERFLOW);
+
+	val = mfspr(SPRN_PMC6);
+	if (val >= COUNTER_OVERFLOW)
+		pmc56_overflowed++;
+
+	count_pmc(6, COUNTER_OVERFLOW);
+
+out:
+	reset_ebb();
+}
+
+int pmc56_overflow(void)
+{
+	struct event event;
+
+	/* Use PMC2 so we set PMCjCE, which enables PMC5/6 */
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC5, 0);
+	mtspr(SPRN_PMC6, 0);
+
+	while (ebb_state.stats.ebb_count < 10)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(2, sample_period);
+
+	dump_ebb_state();
+
+	printf("PMC5/6 overflow %d\n", pmc56_overflowed);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0 || pmc56_overflowed != 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(pmc56_overflow, "pmc56_overflow");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg.h b/tools/testing/selftests/powerpc/pmu/ebb/reg.h
new file mode 100644
index 0000000..5921b0d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg.h

@@ -0,0 +1,49 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_REG_H
+#define _SELFTESTS_POWERPC_REG_H
+
+#define __stringify_1(x)        #x
+#define __stringify(x)          __stringify_1(x)
+
+#define mfspr(rn)       ({unsigned long rval; \
+                         asm volatile("mfspr %0," __stringify(rn) \
+                                 : "=r" (rval)); rval; })
+#define mtspr(rn, v)    asm volatile("mtspr " __stringify(rn) ",%0" : \
+                                    : "r" ((unsigned long)(v)) \
+                                    : "memory")
+
+#define mb()		asm volatile("sync" : : : "memory");
+
+#define SPRN_MMCR2     769
+#define SPRN_MMCRA     770
+#define SPRN_MMCR0     779
+#define   MMCR0_PMAO   0x00000080
+#define   MMCR0_PMAE   0x04000000
+#define   MMCR0_FC     0x80000000
+#define SPRN_EBBHR     804
+#define SPRN_EBBRR     805
+#define SPRN_BESCR     806     /* Branch event status & control register */
+#define SPRN_BESCRS    800     /* Branch event status & control set (1 bits set to 1) */
+#define SPRN_BESCRSU   801     /* Branch event status & control set upper */
+#define SPRN_BESCRR    802     /* Branch event status & control REset (1 bits set to 0) */
+#define SPRN_BESCRRU   803     /* Branch event status & control REset upper */
+
+#define BESCR_PMEO     0x1     /* PMU Event-based exception Occurred */
+#define BESCR_PME      (0x1ul << 32) /* PMU Event-based exception Enable */
+
+#define SPRN_PMC1      771
+#define SPRN_PMC2      772
+#define SPRN_PMC3      773
+#define SPRN_PMC4      774
+#define SPRN_PMC5      775
+#define SPRN_PMC6      776
+
+#define SPRN_SIAR      780
+#define SPRN_SDAR      781
+#define SPRN_SIER      768
+
+#endif /* _SELFTESTS_POWERPC_REG_H */

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
new file mode 100644
index 0000000..0cae66f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c

@@ -0,0 +1,39 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+#include "reg.h"
+
+
+/*
+ * Test basic access to the EBB regs, they should be user accessible with no
+ * kernel interaction required.
+ */
+int reg_access(void)
+{
+	uint64_t val, expected;
+
+	expected = 0x8000000100000000ull;
+	mtspr(SPRN_BESCR, expected);
+	val = mfspr(SPRN_BESCR);
+
+	FAIL_IF(val != expected);
+
+	expected = 0x0000000001000000ull;
+	mtspr(SPRN_EBBHR, expected);
+	val = mfspr(SPRN_EBBHR);
+
+	FAIL_IF(val != expected);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(reg_access, "reg_access");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
new file mode 100644
index 0000000..d56607e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c

@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned per-task event vs an EBB - in that order. The pinned per-task
+ * event should prevent the EBB event from being enabled.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(event, child_pid));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int task_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+	int rc;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the task event first */
+	rc = setup_child_event(&event, pid);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	FAIL_IF(event.result.value == 0);
+	/*
+	 * For reasons I don't understand enabled is usually just slightly
+	 * lower than running. Would be good to confirm why.
+	 */
+	FAIL_IF(event.result.enabled == 0);
+	FAIL_IF(event.result.running == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(task_event_pinned_vs_ebb, "task_event_pinned_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
new file mode 100644
index 0000000..eba3219
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c

@@ -0,0 +1,83 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a per-task event vs an EBB - in that order. The EBB should push the
+ * per-task event off the PMU.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(event, child_pid));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int task_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+	int rc;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the task event first */
+	rc = setup_child_event(&event, pid);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* The EBB event should push the task event off so the child should succeed */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The task event may have run, or not so we can't assert anything about it */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(task_event_vs_ebb, "task_event_vs_ebb");
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.c b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
new file mode 100644
index 0000000..251e66a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.c

@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "trace.h"
+
+
+struct trace_buffer *trace_buffer_allocate(u64 size)
+{
+	struct trace_buffer *tb;
+
+	if (size < sizeof(*tb)) {
+		fprintf(stderr, "Error: trace buffer too small\n");
+		return NULL;
+	}
+
+	tb = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (tb == MAP_FAILED) {
+		perror("mmap");
+		return NULL;
+	}
+
+	tb->size = size;
+	tb->tail = tb->data;
+	tb->overflow = false;
+
+	return tb;
+}
+
+static bool trace_check_bounds(struct trace_buffer *tb, void *p)
+{
+	return p < ((void *)tb + tb->size);
+}
+
+static bool trace_check_alloc(struct trace_buffer *tb, void *p)
+{
+	/*
+	 * If we ever overflowed don't allow any more input. This prevents us
+	 * from dropping a large item and then later logging a small one. The
+	 * buffer should just stop when overflow happened, not be patchy. If
+	 * you're overflowing, make your buffer bigger.
+	 */
+	if (tb->overflow)
+		return false;
+
+	if (!trace_check_bounds(tb, p)) {
+		tb->overflow = true;
+		return false;
+	}
+
+	return true;
+}
+
+static void *trace_alloc(struct trace_buffer *tb, int bytes)
+{
+	void *p, *newtail;
+
+	p = tb->tail;
+	newtail = tb->tail + bytes;
+	if (!trace_check_alloc(tb, newtail))
+		return NULL;
+
+	tb->tail = newtail;
+
+	return p;
+}
+
+static struct trace_entry *trace_alloc_entry(struct trace_buffer *tb, int payload_size)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc(tb, sizeof(*e) + payload_size);
+	if (e)
+		e->length = payload_size;
+
+	return e;
+}
+
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value)
+{
+	struct trace_entry *e;
+	u64 *p;
+
+	e = trace_alloc_entry(tb, sizeof(reg) + sizeof(value));
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_REG;
+	p = (u64 *)e->data;
+	*p++ = reg;
+	*p++ = value;
+
+	return 0;
+}
+
+int trace_log_counter(struct trace_buffer *tb, u64 value)
+{
+	struct trace_entry *e;
+	u64 *p;
+
+	e = trace_alloc_entry(tb, sizeof(value));
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_COUNTER;
+	p = (u64 *)e->data;
+	*p++ = value;
+
+	return 0;
+}
+
+int trace_log_string(struct trace_buffer *tb, char *str)
+{
+	struct trace_entry *e;
+	char *p;
+	int len;
+
+	len = strlen(str);
+
+	/* We NULL terminate to make printing easier */
+	e = trace_alloc_entry(tb, len + 1);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_STRING;
+	p = (char *)e->data;
+	memcpy(p, str, len);
+	p += len;
+	*p = '\0';
+
+	return 0;
+}
+
+int trace_log_indent(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc_entry(tb, 0);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_INDENT;
+
+	return 0;
+}
+
+int trace_log_outdent(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc_entry(tb, 0);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_OUTDENT;
+
+	return 0;
+}
+
+static void trace_print_header(int seq, int prefix)
+{
+	printf("%*s[%d]: ", prefix, "", seq);
+}
+
+static char *trace_decode_reg(int reg)
+{
+	switch (reg) {
+		case 769: return "SPRN_MMCR2"; break;
+		case 770: return "SPRN_MMCRA"; break;
+		case 779: return "SPRN_MMCR0"; break;
+		case 804: return "SPRN_EBBHR"; break;
+		case 805: return "SPRN_EBBRR"; break;
+		case 806: return "SPRN_BESCR"; break;
+		case 800: return "SPRN_BESCRS"; break;
+		case 801: return "SPRN_BESCRSU"; break;
+		case 802: return "SPRN_BESCRR"; break;
+		case 803: return "SPRN_BESCRRU"; break;
+		case 771: return "SPRN_PMC1"; break;
+		case 772: return "SPRN_PMC2"; break;
+		case 773: return "SPRN_PMC3"; break;
+		case 774: return "SPRN_PMC4"; break;
+		case 775: return "SPRN_PMC5"; break;
+		case 776: return "SPRN_PMC6"; break;
+		case 780: return "SPRN_SIAR"; break;
+		case 781: return "SPRN_SDAR"; break;
+		case 768: return "SPRN_SIER"; break;
+	}
+
+	return NULL;
+}
+
+static void trace_print_reg(struct trace_entry *e)
+{
+	u64 *p, *reg, *value;
+	char *name;
+
+	p = (u64 *)e->data;
+	reg = p++;
+	value = p;
+
+	name = trace_decode_reg(*reg);
+	if (name)
+		printf("register %-10s = 0x%016llx\n", name, *value);
+	else
+		printf("register %lld = 0x%016llx\n", *reg, *value);
+}
+
+static void trace_print_counter(struct trace_entry *e)
+{
+	u64 *value;
+
+	value = (u64 *)e->data;
+	printf("counter = %lld\n", *value);
+}
+
+static void trace_print_string(struct trace_entry *e)
+{
+	char *str;
+
+	str = (char *)e->data;
+	puts(str);
+}
+
+#define BASE_PREFIX	2
+#define PREFIX_DELTA	8
+
+static void trace_print_entry(struct trace_entry *e, int seq, int *prefix)
+{
+	switch (e->type) {
+	case TRACE_TYPE_REG:
+		trace_print_header(seq, *prefix);
+		trace_print_reg(e);
+		break;
+	case TRACE_TYPE_COUNTER:
+		trace_print_header(seq, *prefix);
+		trace_print_counter(e);
+		break;
+	case TRACE_TYPE_STRING:
+		trace_print_header(seq, *prefix);
+		trace_print_string(e);
+		break;
+	case TRACE_TYPE_INDENT:
+		trace_print_header(seq, *prefix);
+		puts("{");
+		*prefix += PREFIX_DELTA;
+		break;
+	case TRACE_TYPE_OUTDENT:
+		*prefix -= PREFIX_DELTA;
+		if (*prefix < BASE_PREFIX)
+			*prefix = BASE_PREFIX;
+		trace_print_header(seq, *prefix);
+		puts("}");
+		break;
+	default:
+		trace_print_header(seq, *prefix);
+		printf("entry @ %p type %d\n", e, e->type);
+		break;
+	}
+}
+
+void trace_buffer_print(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+	int i, prefix;
+	void *p;
+
+	printf("Trace buffer dump:\n");
+	printf("  address  %p \n", tb);
+	printf("  tail     %p\n", tb->tail);
+	printf("  size     %llu\n", tb->size);
+	printf("  overflow %s\n", tb->overflow ? "TRUE" : "false");
+	printf("  Content:\n");
+
+	p = tb->data;
+
+	i = 0;
+	prefix = BASE_PREFIX;
+
+	while (trace_check_bounds(tb, p) && p < tb->tail) {
+		e = p;
+
+		trace_print_entry(e, i, &prefix);
+
+		i++;
+		p = (void *)e + sizeof(*e) + e->length;
+	}
+}
+
+void trace_print_location(struct trace_buffer *tb)
+{
+	printf("Trace buffer 0x%llx bytes @ %p\n", tb->size, tb);
+}

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
new file mode 100644
index 0000000..926458e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+#define _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+
+#include "utils.h"
+
+#define TRACE_TYPE_REG		1
+#define TRACE_TYPE_COUNTER	2
+#define TRACE_TYPE_STRING	3
+#define TRACE_TYPE_INDENT	4
+#define TRACE_TYPE_OUTDENT	5
+
+struct trace_entry
+{
+	u8 type;
+	u8 length;
+	u8 data[0];
+};
+
+struct trace_buffer
+{
+	u64  size;
+	bool overflow;
+	void *tail;
+	u8   data[0];
+};
+
+struct trace_buffer *trace_buffer_allocate(u64 size);
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value);
+int trace_log_counter(struct trace_buffer *tb, u64 value);
+int trace_log_string(struct trace_buffer *tb, char *str);
+int trace_log_indent(struct trace_buffer *tb);
+int trace_log_outdent(struct trace_buffer *tb);
+void trace_buffer_print(struct trace_buffer *tb);
+void trace_print_location(struct trace_buffer *tb);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_TRACE_H */

diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
index 2b2d11d..184b368 100644
--- a/tools/testing/selftests/powerpc/pmu/event.c
+++ b/tools/testing/selftests/powerpc/pmu/event.c

@@ -39,7 +39,13 @@
 	event_init_opts(e, config, PERF_TYPE_RAW, name);
 }
 
+void event_init(struct event *e, u64 config)
+{
+	event_init_opts(e, config, PERF_TYPE_RAW, "event");
+}
+
 #define PERF_CURRENT_PID	0
+#define PERF_NO_PID		-1
 #define PERF_NO_CPU		-1
 #define PERF_NO_GROUP		-1
 
@@ -59,6 +65,16 @@
 	return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, group_fd);
 }
 
+int event_open_with_pid(struct event *e, pid_t pid)
+{
+	return event_open_with_options(e, pid, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+int event_open_with_cpu(struct event *e, int cpu)
+{
+	return event_open_with_options(e, PERF_NO_PID, cpu, PERF_NO_GROUP);
+}
+
 int event_open(struct event *e)
 {
 	return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, PERF_NO_GROUP);
@@ -69,6 +85,16 @@
 	close(e->fd);
 }
 
+int event_enable(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+}
+
+int event_disable(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_DISABLE);
+}
+
 int event_reset(struct event *e)
 {
 	return ioctl(e->fd, PERF_EVENT_IOC_RESET);

diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
index e699319..a0ea6b1 100644
--- a/tools/testing/selftests/powerpc/pmu/event.h
+++ b/tools/testing/selftests/powerpc/pmu/event.h

@@ -29,8 +29,12 @@
 void event_init_opts(struct event *e, u64 config, int type, char *name);
 int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
 int event_open_with_group(struct event *e, int group_fd);
+int event_open_with_pid(struct event *e, pid_t pid);
+int event_open_with_cpu(struct event *e, int cpu);
 int event_open(struct event *e);
 void event_close(struct event *e);
+int event_enable(struct event *e);
+int event_disable(struct event *e);
 int event_reset(struct event *e);
 int event_read(struct event *e);
 void event_report_justified(struct event *e, int name_width, int result_width);

diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
new file mode 100644
index 0000000..0f6a473
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.c

@@ -0,0 +1,252 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <errno.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+#include "lib.h"
+
+
+int pick_online_cpu(void)
+{
+	cpu_set_t mask;
+	int cpu;
+
+	CPU_ZERO(&mask);
+
+	if (sched_getaffinity(0, sizeof(mask), &mask)) {
+		perror("sched_getaffinity");
+		return -1;
+	}
+
+	/* We prefer a primary thread, but skip 0 */
+	for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
+		if (CPU_ISSET(cpu, &mask))
+			return cpu;
+
+	/* Search for anything, but in reverse */
+	for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
+		if (CPU_ISSET(cpu, &mask))
+			return cpu;
+
+	printf("No cpus in affinity mask?!\n");
+	return -1;
+}
+
+int bind_to_cpu(int cpu)
+{
+	cpu_set_t mask;
+
+	printf("Binding to cpu %d\n", cpu);
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+
+	return sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+#define PARENT_TOKEN	0xAA
+#define CHILD_TOKEN	0x55
+
+int sync_with_child(union pipe read_pipe, union pipe write_pipe)
+{
+	char c = PARENT_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+	FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+	if (c != CHILD_TOKEN) /* sometimes expected */
+		return 1;
+
+	return 0;
+}
+
+int wait_for_parent(union pipe read_pipe)
+{
+	char c;
+
+	FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+	FAIL_IF(c != PARENT_TOKEN);
+
+	return 0;
+}
+
+int notify_parent(union pipe write_pipe)
+{
+	char c = CHILD_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+	return 0;
+}
+
+int notify_parent_of_error(union pipe write_pipe)
+{
+	char c = ~CHILD_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+	return 0;
+}
+
+int wait_for_child(pid_t child_pid)
+{
+	int rc;
+
+	if (waitpid(child_pid, &rc, 0) == -1) {
+		perror("waitpid");
+		return 1;
+	}
+
+	if (WIFEXITED(rc))
+		rc = WEXITSTATUS(rc);
+	else
+		rc = 1; /* Signal or other */
+
+	return rc;
+}
+
+int kill_child_and_wait(pid_t child_pid)
+{
+	kill(child_pid, SIGTERM);
+
+	return wait_for_child(child_pid);
+}
+
+static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe)
+{
+	volatile int i = 0;
+
+	/*
+	 * We are just here to eat cpu and die. So make sure we can be killed,
+	 * and also don't do any custom SIGTERM handling.
+	 */
+	signal(SIGTERM, SIG_DFL);
+
+	notify_parent(write_pipe);
+	wait_for_parent(read_pipe);
+
+	/* Soak up cpu forever */
+	while (1) i++;
+
+	return 0;
+}
+
+pid_t eat_cpu(int (test_function)(void))
+{
+	union pipe read_pipe, write_pipe;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	if (pipe(read_pipe.fds) == -1)
+		return -1;
+
+	if (pipe(write_pipe.fds) == -1)
+		return -1;
+
+	pid = fork();
+	if (pid == 0)
+		exit(eat_cpu_child(write_pipe, read_pipe));
+
+	if (sync_with_child(read_pipe, write_pipe)) {
+		rc = -1;
+		goto out;
+	}
+
+	printf("main test running as pid %d\n", getpid());
+
+	rc = test_function();
+out:
+	kill(pid, SIGKILL);
+
+	return rc;
+}
+
+struct addr_range libc, vdso;
+
+int parse_proc_maps(void)
+{
+	char execute, name[128];
+	uint64_t start, end;
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	do {
+		/* This skips line with no executable which is what we want */
+		rc = fscanf(f, "%lx-%lx %*c%*c%c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, &execute, name);
+		if (rc <= 0)
+			break;
+
+		if (execute != 'x')
+			continue;
+
+		if (strstr(name, "libc")) {
+			libc.first = start;
+			libc.last = end - 1;
+		} else if (strstr(name, "[vdso]")) {
+			vdso.first = start;
+			vdso.last = end - 1;
+		}
+	} while(1);
+
+	fclose(f);
+
+	return 0;
+}
+
+#define PARANOID_PATH	"/proc/sys/kernel/perf_event_paranoid"
+
+bool require_paranoia_below(int level)
+{
+	unsigned long current;
+	char *end, buf[16];
+	FILE *f;
+	int rc;
+
+	rc = -1;
+
+	f = fopen(PARANOID_PATH, "r");
+	if (!f) {
+		perror("fopen");
+		goto out;
+	}
+
+	if (!fgets(buf, sizeof(buf), f)) {
+		printf("Couldn't read " PARANOID_PATH "?\n");
+		goto out_close;
+	}
+
+	current = strtoul(buf, &end, 10);
+
+	if (end == buf) {
+		printf("Couldn't parse " PARANOID_PATH "?\n");
+		goto out_close;
+	}
+
+	if (current >= level)
+		goto out;
+
+	rc = 0;
+out_close:
+	fclose(f);
+out:
+	return rc;
+}

diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
new file mode 100644
index 0000000..ca5d72a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
+#define __SELFTESTS_POWERPC_PMU_LIB_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+union pipe {
+	struct {
+		int read_fd;
+		int write_fd;
+	};
+	int fds[2];
+};
+
+extern int pick_online_cpu(void);
+extern int bind_to_cpu(int cpu);
+extern int kill_child_and_wait(pid_t child_pid);
+extern int wait_for_child(pid_t child_pid);
+extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
+extern int wait_for_parent(union pipe read_pipe);
+extern int notify_parent(union pipe write_pipe);
+extern int notify_parent_of_error(union pipe write_pipe);
+extern pid_t eat_cpu(int (test_function)(void));
+extern bool require_paranoia_below(int level);
+
+struct addr_range {
+	uint64_t first, last;
+};
+
+extern struct addr_range libc, vdso;
+
+int parse_proc_maps(void);
+
+#endif /* __SELFTESTS_POWERPC_PMU_LIB_H */

diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
index 8820e3d..20c1f08 100644
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ b/tools/testing/selftests/powerpc/pmu/loop.S

@@ -3,44 +3,41 @@
  * Licensed under GPLv2.
  */
 
+#include <ppc-asm.h>
+
 	.text
 
-	.global thirty_two_instruction_loop
-	.type .thirty_two_instruction_loop,@function
-	.section ".opd","aw",@progbits
-thirty_two_instruction_loop:
-	.quad .thirty_two_instruction_loop, .TOC.@tocbase, 0
-	.previous
-.thirty_two_instruction_loop:
-	cmpwi	%r3,0
+FUNC_START(thirty_two_instruction_loop)
+	cmpdi	r3,0
 	beqlr
-	addi	%r4,%r3,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1
-	addi	%r4,%r4,1	# 28 addi's
-	subi	%r3,%r3,1
-	b	.thirty_two_instruction_loop
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)

diff --git a/tools/testing/selftests/powerpc/subunit.h b/tools/testing/selftests/powerpc/subunit.h
index 98a2292..9c6c4e9 100644
--- a/tools/testing/selftests/powerpc/subunit.h
+++ b/tools/testing/selftests/powerpc/subunit.h

@@ -26,6 +26,11 @@
 	printf("error: %s\n", name);
 }
 
+static inline void test_skip(char *name)
+{
+	printf("skip: %s\n", name);
+}
+
 static inline void test_success(char *name)
 {
 	printf("success: %s\n", name);

diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
new file mode 100644
index 0000000..51267f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/Makefile

@@ -0,0 +1,15 @@
+PROGS := tm-resched-dscr
+
+all: $(PROGS)
+
+$(PROGS):
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS) *.o
+
+.PHONY: all run_tests clean

diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
new file mode 100644
index 0000000..ee98e38
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c

@@ -0,0 +1,90 @@
+/* Test context switching to see if the DSCR SPR is correctly preserved
+ * when within a transaction.
+ *
+ * Note: We assume that the DSCR has been left at the default value (0)
+ * for all CPUs.
+ *
+ * Method:
+ *
+ * Set a value into the DSCR.
+ *
+ * Start a transaction, and suspend it (*).
+ *
+ * Hard loop checking to see if the transaction has become doomed.
+ *
+ * Now that we *may* have been preempted, record the DSCR and TEXASR SPRS.
+ *
+ * If the abort was because of a context switch, check the DSCR value.
+ * Otherwise, try again.
+ *
+ * (*) If the transaction is not suspended we can't see the problem because
+ * the transaction abort handler will restore the DSCR to it's checkpointed
+ * value before we regain control.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <asm/tm.h>
+
+#define TBEGIN          ".long 0x7C00051D ;"
+#define TEND            ".long 0x7C00055D ;"
+#define TCHECK          ".long 0x7C00059C ;"
+#define TSUSPEND        ".long 0x7C0005DD ;"
+#define TRESUME         ".long 0x7C2005DD ;"
+#define SPRN_TEXASR     0x82
+#define SPRN_DSCR       0x03
+
+int main(void) {
+	uint64_t rv, dscr1 = 1, dscr2, texasr;
+
+	printf("Check DSCR TM context switch: ");
+	fflush(stdout);
+	for (;;) {
+		rv = 1;
+		asm __volatile__ (
+			/* set a known value into the DSCR */
+			"ld      3, %[dscr1];"
+			"mtspr   %[sprn_dscr], 3;"
+
+			/* start and suspend a transaction */
+			TBEGIN
+			"beq     1f;"
+			TSUSPEND
+
+			/* hard loop until the transaction becomes doomed */
+			"2: ;"
+			TCHECK
+			"bc      4, 0, 2b;"
+
+			/* record DSCR and TEXASR */
+			"mfspr   3, %[sprn_dscr];"
+			"std     3, %[dscr2];"
+			"mfspr   3, %[sprn_texasr];"
+			"std     3, %[texasr];"
+
+			TRESUME
+			TEND
+			"li      %[rv], 0;"
+			"1: ;"
+			: [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
+			: [dscr1]"m"(dscr1)
+			, [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR)
+			: "memory", "r3"
+		);
+		assert(rv); /* make sure the transaction aborted */
+		if ((texasr >> 56) != TM_CAUSE_RESCHED) {
+			putchar('.');
+			fflush(stdout);
+			continue;
+		}
+		if (dscr2 != dscr1) {
+			printf(" FAIL\n");
+			exit(EXIT_FAILURE);
+		} else {
+			printf(" OK\n");
+			exit(EXIT_SUCCESS);
+		}
+	}
+}

diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h
index 0de0644..a93777a 100644
--- a/tools/testing/selftests/powerpc/utils.h
+++ b/tools/testing/selftests/powerpc/utils.h

@@ -31,6 +31,18 @@
 	}							\
 } while (0)
 
+/* The test harness uses this, yes it's gross */
+#define MAGIC_SKIP_RETURN_VALUE	99
+
+#define SKIP_IF(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[SKIP] Test skipped on line %d\n", __LINE__);	\
+		return MAGIC_SKIP_RETURN_VALUE;			\
+	}							\
+} while (0)
+
 #define _str(s) #s
 #define str(s) _str(s)
 

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c86be0f..4b6c01b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -1714,11 +1714,11 @@
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;
 	struct task_struct *task = NULL;
-	bool ret = false;
+	int ret = 0;
 
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
commit	a9be22425e767d936105679fdc9f568b97bd47cf	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Sun Jun 15 16:37:03 2014 -1000
committer	Linus Torvalds <torvalds@linux-foundation.org>	Sun Jun 15 16:37:03 2014 -1000
tree	37a63136da83dcf272668462f96eed1e96f37de3
parent	dd1845af24a47b70cf84c29126698884f740ff9c [diff]
parent	b58537a1f5629bdc98a8b9dc2051ce0e952f6b4b [diff]