Merge tag 'usb-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb

Pull USB driver updates from Greg KH:
 "Here's the big USB (and PHY) driver patchset for 4.1-rc1.

  Everything here has been in linux-next, and the full details are below
  in the shortlog.  Nothing major, just the normal round of new
  drivers,api updates, and other changes, mostly in the USB gadget area,
  as usual"

* tag 'usb-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb: (252 commits)
  drivers/usb/core: devio.c: Removed an uneeded space before tab
  usb: dwc2: host: sleep USB_RESUME_TIMEOUT during resume
  usb: chipidea: debug: add low power mode check before print registers
  usb: chipidea: udc: bypass pullup DP when gadget connect in OTG fsm mode
  usb: core: hub: use new USB_RESUME_TIMEOUT
  usb: isp1760: hcd: use new USB_RESUME_TIMEOUT
  usb: dwc2: hcd: use new USB_RESUME_TIMEOUT
  usb: host: sl811: use new USB_RESUME_TIMEOUT
  usb: host: r8a66597: use new USB_RESUME_TIMEOUT
  usb: host: oxu210hp: use new USB_RESUME_TIMEOUT
  usb: host: fusbh200: use new USB_RESUME_TIMEOUT
  usb: host: fotg210: use new USB_RESUME_TIMEOUT
  usb: host: isp116x: use new USB_RESUME_TIMEOUT
  usb: musb: use new USB_RESUME_TIMEOUT
  usb: host: uhci: use new USB_RESUME_TIMEOUT
  usb: host: ehci: use new USB_RESUME_TIMEOUT
  usb: host: xhci: use new USB_RESUME_TIMEOUT
  usb: define a generic USB_RESUME_TIMEOUT macro
  usb: musb: dsps: fix build on i386 when COMPILE_TEST is set
  ehci-hub: use USB_DT_HUB
  ...
diff --git a/CREDITS b/CREDITS
index 96935df..843e176 100644
--- a/CREDITS
+++ b/CREDITS
@@ -187,6 +187,10 @@
 E: balasub@cis.ohio-state.edu
 D: Wrote SYS V IPC (part of standard kernel since 0.99.10)
 
+N: Chris Ball
+E: chris@printf.net
+D: Former maintainer of the MMC/SD/SDIO subsystem.
+
 N: Dario Ballabio
 E: ballabio_dario@emc.com
 E: dario.ballabio@tiscalinet.it
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 9518006..123881f 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -564,14 +564,14 @@
 8. Vendor and device identifications
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-One is not required to add new device ids to include/linux/pci_ids.h.
-Please add PCI_VENDOR_ID_xxx for vendors and a hex constant for device ids.
+Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
+are shared across multiple drivers.  You can add private definitions in
+your driver if they're helpful, or just use plain hex constants.
 
-PCI_VENDOR_ID_xxx constants are re-used. The device ids are arbitrary
-hex numbers (vendor controlled) and normally used only in a single
-location, the pci_device_id table.
+The device IDs are arbitrary hex numbers (vendor controlled) and normally used
+only in a single location, the pci_device_id table.
 
-Please DO submit new vendor/device ids to pciids.sourceforge.net project.
+Please DO submit new vendor/device IDs to http://pciids.sourceforge.net/.
 
 
 
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index f51861b..e550c8b 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -1,129 +1,177 @@
 			APEI Error INJection
 			~~~~~~~~~~~~~~~~~~~~
 
-EINJ provides a hardware error injection mechanism
-It is very useful for debugging and testing of other APEI and RAS features.
+EINJ provides a hardware error injection mechanism. It is very useful
+for debugging and testing APEI and RAS features in general.
 
-To use EINJ, make sure the following are enabled in your kernel
+You need to check whether your BIOS supports EINJ first. For that, look
+for early boot messages similar to this one:
+
+ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL           00000001 INTL 00000001)
+
+which shows that the BIOS is exposing an EINJ table - it is the
+mechanism through which the injection is done.
+
+Alternatively, look in /sys/firmware/acpi/tables for an "EINJ" file,
+which is a different representation of the same thing.
+
+It doesn't necessarily mean that EINJ is not supported if those above
+don't exist: before you give up, go into BIOS setup to see if the BIOS
+has an option to enable error injection. Look for something called WHEA
+or similar. Often, you need to enable an ACPI5 support option prior, in
+order to see the APEI,EINJ,... functionality supported and exposed by
+the BIOS menu.
+
+To use EINJ, make sure the following are options enabled in your kernel
 configuration:
 
 CONFIG_DEBUG_FS
 CONFIG_ACPI_APEI
 CONFIG_ACPI_APEI_EINJ
 
-The user interface of EINJ is debug file system, under the
-directory apei/einj. The following files are provided.
+The EINJ user interface is in <debugfs mount point>/apei/einj.
+
+The following files belong to it:
 
 - available_error_type
-  Reading this file returns the error injection capability of the
-  platform, that is, which error types are supported. The error type
-  definition is as follow, the left field is the error type value, the
-  right field is error description.
 
-    0x00000001	Processor Correctable
-    0x00000002	Processor Uncorrectable non-fatal
-    0x00000004	Processor Uncorrectable fatal
-    0x00000008  Memory Correctable
-    0x00000010  Memory Uncorrectable non-fatal
-    0x00000020  Memory Uncorrectable fatal
-    0x00000040	PCI Express Correctable
-    0x00000080	PCI Express Uncorrectable fatal
-    0x00000100	PCI Express Uncorrectable non-fatal
-    0x00000200	Platform Correctable
-    0x00000400	Platform Uncorrectable non-fatal
-    0x00000800	Platform Uncorrectable fatal
+  This file shows which error types are supported:
 
-  The format of file contents are as above, except there are only the
-  available error type lines.
+  Error Type Value	Error Description
+  ================	=================
+  0x00000001		Processor Correctable
+  0x00000002		Processor Uncorrectable non-fatal
+  0x00000004		Processor Uncorrectable fatal
+  0x00000008		Memory Correctable
+  0x00000010		Memory Uncorrectable non-fatal
+  0x00000020		Memory Uncorrectable fatal
+  0x00000040		PCI Express Correctable
+  0x00000080		PCI Express Uncorrectable fatal
+  0x00000100		PCI Express Uncorrectable non-fatal
+  0x00000200		Platform Correctable
+  0x00000400		Platform Uncorrectable non-fatal
+  0x00000800		Platform Uncorrectable fatal
+
+  The format of the file contents are as above, except present are only
+  the available error types.
 
 - error_type
-  This file is used to set the error type value. The error type value
-  is defined in "available_error_type" description.
+
+  Set the value of the error type being injected. Possible error types
+  are defined in the file available_error_type above.
 
 - error_inject
-  Write any integer to this file to trigger the error
-  injection. Before this, please specify all necessary error
-  parameters.
+
+  Write any integer to this file to trigger the error injection. Make
+  sure you have specified all necessary error parameters, i.e. this
+  write should be the last step when injecting errors.
 
 - flags
-  Present for kernel version 3.13 and above. Used to specify which
-  of param{1..4} are valid and should be used by BIOS during injection.
-  Value is a bitmask as specified in ACPI5.0 spec for the
+
+  Present for kernel versions 3.13 and above. Used to specify which
+  of param{1..4} are valid and should be used by the firmware during
+  injection. Value is a bitmask as specified in ACPI5.0 spec for the
   SET_ERROR_TYPE_WITH_ADDRESS data structure:
-	Bit 0 - Processor APIC field valid (see param3 below)
-	Bit 1 - Memory address and mask valid (param1 and param2)
-	Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below)
-  If set to zero, legacy behaviour is used where the type of injection
-  specifies just one bit set, and param1 is multiplexed.
+
+	Bit 0 - Processor APIC field valid (see param3 below).
+	Bit 1 - Memory address and mask valid (param1 and param2).
+	Bit 2 - PCIe (seg,bus,dev,fn) valid (see param4 below).
+
+  If set to zero, legacy behavior is mimicked where the type of
+  injection specifies just one bit set, and param1 is multiplexed.
 
 - param1
-  This file is used to set the first error parameter value. Effect of
-  parameter depends on error_type specified. For example, if error
-  type is memory related type, the param1 should be a valid physical
-  memory address. [Unless "flag" is set - see above]
+
+  This file is used to set the first error parameter value. Its effect
+  depends on the error type specified in error_type. For example, if
+  error type is memory related type, the param1 should be a valid
+  physical memory address. [Unless "flag" is set - see above]
 
 - param2
-  This file is used to set the second error parameter value. Effect of
-  parameter depends on error_type specified. For example, if error
-  type is memory related type, the param2 should be a physical memory
-  address mask. Linux requires page or narrower granularity, say,
-  0xfffffffffffff000.
+
+  Same use as param1 above. For example, if error type is of memory
+  related type, then param2 should be a physical memory address mask.
+  Linux requires page or narrower granularity, say, 0xfffffffffffff000.
 
 - param3
-  Used when the 0x1 bit is set in "flag" to specify the APIC id
+
+  Used when the 0x1 bit is set in "flags" to specify the APIC id
 
 - param4
-  Used when the 0x4 bit is set in "flag" to specify target PCIe device
+  Used when the 0x4 bit is set in "flags" to specify target PCIe device
 
 - notrigger
-  The EINJ mechanism is a two step process. First inject the error, then
-  perform some actions to trigger it. Setting "notrigger" to 1 skips the
-  trigger phase, which *may* allow the user to cause the error in some other
-  context by a simple access to the cpu, memory location, or device that is
-  the target of the error injection. Whether this actually works depends
-  on what operations the BIOS actually includes in the trigger phase.
 
-BIOS versions based in the ACPI 4.0 specification have limited options
-to control where the errors are injected.  Your BIOS may support an
-extension (enabled with the param_extension=1 module parameter, or
-boot command line einj.param_extension=1). This allows the address
-and mask for memory injections to be specified by the param1 and
-param2 files in apei/einj.
+  The error injection mechanism is a two-step process. First inject the
+  error, then perform some actions to trigger it. Setting "notrigger"
+  to 1 skips the trigger phase, which *may* allow the user to cause the
+  error in some other context by a simple access to the CPU, memory
+  location, or device that is the target of the error injection. Whether
+  this actually works depends on what operations the BIOS actually
+  includes in the trigger phase.
 
-BIOS versions using the ACPI 5.0 specification have more control over
-the target of the injection. For processor related errors (type 0x1,
-0x2 and 0x4) the APICID of the target should be provided using the
-param1 file in apei/einj. For memory errors (type 0x8, 0x10 and 0x20)
-the address is set using param1 with a mask in param2 (0x0 is equivalent
-to all ones). For PCI express errors (type 0x40, 0x80 and 0x100) the
-segment, bus, device and function are specified using param1:
+BIOS versions based on the ACPI 4.0 specification have limited options
+in controlling where the errors are injected. Your BIOS may support an
+extension (enabled with the param_extension=1 module parameter, or boot
+command line einj.param_extension=1). This allows the address and mask
+for memory injections to be specified by the param1 and param2 files in
+apei/einj.
+
+BIOS versions based on the ACPI 5.0 specification have more control over
+the target of the injection. For processor-related errors (type 0x1, 0x2
+and 0x4), you can set flags to 0x3 (param3 for bit 0, and param1 and
+param2 for bit 1) so that you have more information added to the error
+signature being injected. The actual data passed is this:
+
+	memory_address = param1;
+	memory_address_range = param2;
+	apicid = param3;
+	pcie_sbdf = param4;
+
+For memory errors (type 0x8, 0x10 and 0x20) the address is set using
+param1 with a mask in param2 (0x0 is equivalent to all ones). For PCI
+express errors (type 0x40, 0x80 and 0x100) the segment, bus, device and
+function are specified using param1:
 
          31     24 23    16 15    11 10      8  7        0
 	+-------------------------------------------------+
 	| segment |   bus  | device | function | reserved |
 	+-------------------------------------------------+
 
-An ACPI 5.0 BIOS may also allow vendor specific errors to be injected.
+Anyway, you get the idea, if there's doubt just take a look at the code
+in drivers/acpi/apei/einj.c.
+
+An ACPI 5.0 BIOS may also allow vendor-specific errors to be injected.
 In this case a file named vendor will contain identifying information
 from the BIOS that hopefully will allow an application wishing to use
-the vendor specific extension to tell that they are running on a BIOS
+the vendor-specific extension to tell that they are running on a BIOS
 that supports it. All vendor extensions have the 0x80000000 bit set in
 error_type. A file vendor_flags controls the interpretation of param1
 and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor
 documentation for details (and expect changes to this API if vendors
 creativity in using this feature expands beyond our expectations).
 
-Example:
+
+An error injection example:
+
 # cd /sys/kernel/debug/apei/einj
 # cat available_error_type		# See which errors can be injected
 0x00000002	Processor Uncorrectable non-fatal
 0x00000008	Memory Correctable
 0x00000010	Memory Uncorrectable non-fatal
 # echo 0x12345000 > param1		# Set memory address for injection
-# echo 0xfffffffffffff000 > param2	# Mask - anywhere in this page
+# echo $((-1 << 12)) > param2		# Mask 0xfffffffffffff000 - anywhere in this page
 # echo 0x8 > error_type			# Choose correctable memory error
 # echo 1 > error_inject			# Inject now
 
+You should see something like this in dmesg:
+
+[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
+[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
+[22715.834759] EDAC sbridge MC3: TSC 0
+[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
+[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
+[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
 
 For more information about EINJ, please refer to ACPI specification
 version 4.0, section 17.5 and ACPI 5.0, section 18.6.
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index f2235a1..fdf7dff 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -392,8 +392,10 @@
 than one big one, but doing so means that overloads in one of the
 two domains won't be load balanced to the other one.
 
-By default, there is one sched domain covering all CPUs, except those
-marked isolated using the kernel boot time "isolcpus=" argument.
+By default, there is one sched domain covering all CPUs, including those
+marked isolated using the kernel boot time "isolcpus=" argument. However,
+the isolated CPUs will not participate in load balancing, and will not
+have tasks running on them unless explicitly assigned.
 
 This default load balancing across all CPUs is not well suited for
 the following two situations:
@@ -465,6 +467,10 @@
 constrained to some subset of the CPUs allowed to them, for lack of
 load balancing to the other CPUs.
 
+CPUs in "cpuset.isolcpus" were excluded from load balancing by the
+isolcpus= kernel boot option, and will never be load balanced regardless
+of the value of "cpuset.sched_load_balance" in any cpuset.
+
 1.7.1 sched_load_balance implementation details.
 ------------------------------------------------
 
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt
new file mode 100644
index 0000000..44aa3c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt
@@ -0,0 +1,14 @@
+Freescale Vybrid Miscellaneous System Control - CPU Configuration
+
+The MSCM IP contains multiple sub modules, this binding describes the first
+block of registers which contains CPU configuration information.
+
+Required properties:
+- compatible:	"fsl,vf610-mscm-cpucfg", "syscon"
+- reg:		the register range of the MSCM CPU configuration registers
+
+Example:
+	mscm_cpucfg: cpucfg@40001000 {
+		compatible = "fsl,vf610-mscm-cpucfg", "syscon";
+		reg = <0x40001000 0x800>;
+	}
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt
new file mode 100644
index 0000000..669808b2a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt
@@ -0,0 +1,33 @@
+Freescale Vybrid Miscellaneous System Control - Interrupt Router
+
+The MSCM IP contains multiple sub modules, this binding describes the second
+block of registers which control the interrupt router. The interrupt router
+allows to configure the recipient of each peripheral interrupt. Furthermore
+it controls the directed processor interrupts. The module is available in all
+Vybrid SoC's but is only really useful in dual core configurations (VF6xx
+which comes with a Cortex-A5/Cortex-M4 combination).
+
+Required properties:
+- compatible:		"fsl,vf610-mscm-ir"
+- reg:			the register range of the MSCM Interrupt Router
+- fsl,cpucfg:		The handle to the MSCM CPU configuration node, required
+			to get the current CPU ID
+- interrupt-controller:	Identifies the node as an interrupt controller
+- #interrupt-cells:	Two cells, interrupt number and cells.
+			The hardware interrupt number according to interrupt
+			assignment of the interrupt router is required.
+			Flags get passed only when using GIC as parent. Flags
+			encoding as documented by the GIC bindings.
+- interrupt-parent:	Should be the phandle for the interrupt controller of
+			the CPU the device tree is intended to be used on. This
+			is either the node of the GIC or NVIC controller.
+
+Example:
+	mscm_ir: interrupt-controller@40001800 {
+		compatible = "fsl,vf610-mscm-ir";
+		reg = <0x40001800 0x400>;
+		fsl,cpucfg = <&mscm_cpucfg>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		interrupt-parent = <&intc>;
+	}
diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt
index c97484b..1e0d212 100644
--- a/Documentation/devicetree/bindings/arm/gic.txt
+++ b/Documentation/devicetree/bindings/arm/gic.txt
@@ -56,11 +56,6 @@
   regions, used when the GIC doesn't have banked registers. The offset is
   cpu-offset * cpu-nr.
 
-- arm,routable-irqs : Total number of gic irq inputs which are not directly
-		  connected from the peripherals, but are routed dynamically
-		  by a crossbar/multiplexer preceding the GIC. The GIC irq
-		  input line is assigned dynamically when the corresponding
-		  peripheral's crossbar line is mapped.
 Example:
 
 	intc: interrupt-controller@fff11000 {
@@ -68,7 +63,6 @@
 		#interrupt-cells = <3>;
 		#address-cells = <1>;
 		interrupt-controller;
-		arm,routable-irqs = <160>;
 		reg = <0xfff11000 0x1000>,
 		      <0xfff10100 0x100>;
 	};
diff --git a/Documentation/devicetree/bindings/arm/omap/crossbar.txt b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
index 4139db3..a9b28d7 100644
--- a/Documentation/devicetree/bindings/arm/omap/crossbar.txt
+++ b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
@@ -9,7 +9,9 @@
 Required properties:
 - compatible : Should be "ti,irq-crossbar"
 - reg: Base address and the size of the crossbar registers.
-- ti,max-irqs: Total number of irqs available at the interrupt controller.
+- interrupt-controller: indicates that this block is an interrupt controller.
+- interrupt-parent: the interrupt controller this block is connected to.
+- ti,max-irqs: Total number of irqs available at the parent interrupt controller.
 - ti,max-crossbar-sources: Maximum number of crossbar sources that can be routed.
 - ti,reg-size: Size of a individual register in bytes. Every individual
 	    register is assumed to be of same size. Valid sizes are 1, 2, 4.
@@ -27,13 +29,13 @@
   when the interrupt controller irq is unused (when not provided, default is 0)
 
 Examples:
-		crossbar_mpu: @4a020000 {
+		crossbar_mpu: crossbar@4a002a48 {
 			compatible = "ti,irq-crossbar";
 			reg = <0x4a002a48 0x130>;
 			ti,max-irqs = <160>;
 			ti,max-crossbar-sources = <400>;
 			ti,reg-size = <2>;
-			ti,irqs-reserved = <0 1 2 3 5 6 131 132 139 140>;
+			ti,irqs-reserved = <0 1 2 3 5 6 131 132>;
 			ti,irqs-skip = <10 133 139 140>;
 		};
 
@@ -44,10 +46,6 @@
 
 An interrupt consumer on an SoC using crossbar will use:
 	interrupts = <GIC_SPI request_number interrupt_level>
-When the request number is between 0 to that described by
-"ti,max-crossbar-sources", it is assumed to be a crossbar mapping. If the
-request_number is greater than "ti,max-crossbar-sources", then it is mapped as a
-quirky hardware mapping direct to GIC.
 
 Example:
 	device_x@0x4a023000 {
@@ -55,9 +53,3 @@
 		interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
 		...
 	};
-
-	device_y@0x4a033000 {
-		/* Direct mapped GIC SPI 1 used */
-		interrupts = <GIC_SPI DIRECT_IRQ(1) IRQ_TYPE_LEVEL_HIGH>;
-		...
-	};
diff --git a/Documentation/devicetree/bindings/arm/samsung/pmu.txt b/Documentation/devicetree/bindings/arm/samsung/pmu.txt
index 67b2113..2d6356d 100644
--- a/Documentation/devicetree/bindings/arm/samsung/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/samsung/pmu.txt
@@ -29,10 +29,27 @@
  - clocks : list of phandles and specifiers to all input clocks listed in
 		clock-names property.
 
+Optional properties:
+
+Some PMUs are capable of behaving as an interrupt controller (mostly
+to wake up a suspended PMU). In which case, they can have the
+following properties:
+
+- interrupt-controller: indicate that said PMU is an interrupt controller
+
+- #interrupt-cells: must be identical to the that of the parent interrupt
+  controller.
+
+- interrupt-parent: a phandle indicating which interrupt controller
+  this PMU signals interrupts to.
+
 Example :
 pmu_system_controller: system-controller@10040000 {
 	compatible = "samsung,exynos5250-pmu", "syscon";
 	reg = <0x10040000 0x5000>;
+	interrupt-controller;
+	#interrupt-cells = <3>;
+	interrupt-parent = <&gic>;
 	#clock-cells = <1>;
 	clock-names = "clkout0", "clkout1", "clkout2", "clkout3",
 			"clkout4", "clkout8", "clkout9";
diff --git a/Documentation/devicetree/bindings/ata/ahci-st.txt b/Documentation/devicetree/bindings/ata/ahci-st.txt
index 0574a77..e1d01df 100644
--- a/Documentation/devicetree/bindings/ata/ahci-st.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-st.txt
@@ -3,29 +3,48 @@
 This binding describes a SATA device.
 
 Required properties:
- - compatible	   : Must be "st,sti-ahci"
+ - compatible	   : Must be "st,ahci"
  - reg		   : Physical base addresses and length of register sets
  - interrupts	   : Interrupt associated with the SATA device
  - interrupt-names :   Associated name must be; "hostc"
- - resets	   : The power-down and soft-reset lines of SATA IP
- - reset-names	   :   Associated names must be; "pwr-dwn" and "sw-rst"
  - clocks	   : The phandle for the clock
  - clock-names	   :   Associated name must be; "ahci_clk"
- - phys		   : The phandle for the PHY device
+ - phys		   : The phandle for the PHY port
  - phy-names	   :   Associated name must be; "ahci_phy"
 
+Optional properties:
+ - resets	   : The power-down, soft-reset and power-reset lines of SATA IP
+ - reset-names	   :   Associated names must be; "pwr-dwn", "sw-rst" and "pwr-rst"
+
 Example:
 
+	/* Example for stih416 */
 	sata0: sata@fe380000 {
-		compatible      = "st,sti-ahci";
-		reg             = <0xfe380000 0x1000>;
-		interrupts      = <GIC_SPI 157 IRQ_TYPE_NONE>;
-		interrupt-names = "hostc";
-		phys	        = <&miphy365x_phy MIPHY_PORT_0 MIPHY_TYPE_SATA>;
-		phy-names       = "ahci_phy";
-		resets	        = <&powerdown STIH416_SATA0_POWERDOWN>,
+		compatible	= "st,ahci";
+		reg		= <0xfe380000 0x1000>;
+		interrupts	= <GIC_SPI 157 IRQ_TYPE_NONE>;
+		interrupt-names	= "hostc";
+		phys		= <&phy_port0 PHY_TYPE_SATA>;
+		phy-names	= "ahci_phy";
+		resets		= <&powerdown STIH416_SATA0_POWERDOWN>,
 				  <&softreset STIH416_SATA0_SOFTRESET>;
-		reset-names     = "pwr-dwn", "sw-rst";
-		clocks	        = <&clk_s_a0_ls CLK_ICN_REG>;
-		clock-names     = "ahci_clk";
+		reset-names	= "pwr-dwn", "sw-rst";
+		clocks		= <&clk_s_a0_ls CLK_ICN_REG>;
+		clock-names	= "ahci_clk";
+	};
+
+	/* Example for stih407 family silicon */
+	sata0: sata@9b20000 {
+		compatible	= "st,ahci";
+		reg		= <0x9b20000 0x1000>;
+		interrupts	= <GIC_SPI 159 IRQ_TYPE_NONE>;
+		interrupt-names	= "hostc";
+		phys		= <&phy_port0 PHY_TYPE_SATA>;
+		phy-names	= "ahci_phy";
+		resets		= <&powerdown STIH407_SATA0_POWERDOWN>,
+				  <&softreset STIH407_SATA0_SOFTRESET>,
+				  <&softreset STIH407_SATA0_PWR_SOFTRESET>;
+		reset-names	= "pwr-dwn", "sw-rst", "pwr-rst";
+		clocks		= <&clk_s_c0_flexgen CLK_ICN_REG>;
+		clock-names	= "ahci_clk";
 	};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-fan.txt b/Documentation/devicetree/bindings/gpio/gpio-fan.txt
index 2dd457a..439a743 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-fan.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-fan.txt
@@ -2,15 +2,20 @@
 
 Required properties:
 - compatible : "gpio-fan"
+
+Optional properties:
 - gpios: Specifies the pins that map to bits in the control value,
   ordered MSB-->LSB.
 - gpio-fan,speed-map: A mapping of possible fan RPM speeds and the
   control value that should be set to achieve them. This array
   must have the RPM values in ascending order.
-
-Optional properties:
 - alarm-gpios: This pin going active indicates something is wrong with
   the fan, and a udev event will be fired.
+- cooling-cells: If used as a cooling device, must be <2>
+  Also see: Documentation/devicetree/bindings/thermal/thermal.txt
+  min and max states are derived from the speed-map of the fan.
+
+Note: At least one the "gpios" or "alarm-gpios" properties must be set.
 
 Examples:
 
@@ -23,3 +28,13 @@
 				      6000 2>;
 		alarm-gpios = <&gpio1 15 1>;
 	};
+	gpio_fan_cool: gpio_fan {
+		compatible = "gpio-fan";
+		gpios = <&gpio2 14 1
+			 &gpio2 13 1>;
+		gpio-fan,speed-map =	<0    0>,
+					<3000 1>,
+					<6000 2>;
+		alarm-gpios = <&gpio2 15 1>;
+		#cooling-cells = <2>; /* min followed by max */
+	};
diff --git a/Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt b/Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt
new file mode 100644
index 0000000..c07228d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt
@@ -0,0 +1,16 @@
+Dialog Semiconductor DA9150 IIO GPADC bindings
+
+Required properties:
+- compatible: "dlg,da9150-gpadc" for DA9150 IIO GPADC
+- #io-channel-cells: Should be set to <1>
+  (See Documentation/devicetree/bindings/iio/iio-bindings.txt for further info)
+
+For further information on GPADC channels, see device datasheet.
+
+
+Example:
+
+	gpadc: da9150-gpadc {
+		compatible = "dlg,da9150-gpadc";
+		#io-channel-cells = <1>;
+	};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt b/Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt
new file mode 100644
index 0000000..1099fe0
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt
@@ -0,0 +1,43 @@
+NVIDIA Legacy Interrupt Controller
+
+All Tegra SoCs contain a legacy interrupt controller that routes
+interrupts to the GIC, and also serves as a wakeup source. It is also
+referred to as "ictlr", hence the name of the binding.
+
+The HW block exposes a number of interrupt controllers, each
+implementing a set of 32 interrupts.
+
+Required properties:
+
+- compatible : should be: "nvidia,tegra<chip>-ictlr". The LIC on
+  subsequent SoCs remained backwards-compatible with Tegra30, so on
+  Tegra generations later than Tegra30 the compatible value should
+  include "nvidia,tegra30-ictlr".	
+- reg : Specifies base physical address and size of the registers.
+  Each controller must be described separately (Tegra20 has 4 of them,
+  whereas Tegra30 and later have 5"  
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value must be 3.
+- interrupt-parent : a phandle to the GIC these interrupts are routed
+  to.
+
+Notes:
+
+- Because this HW ultimately routes interrupts to the GIC, the
+  interrupt specifier must be that of the GIC.
+- Only SPIs can use the ictlr as an interrupt parent. SGIs and PPIs
+  are explicitly forbidden.
+
+Example:
+
+	ictlr: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra20-ictlr", "nvidia,tegra-ictlr";
+		reg = <0x60004000 64>,
+		      <0x60004100 64>,
+		      <0x60004200 64>,
+		      <0x60004300 64>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
+	};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
index 1a88e62..63633bd 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
@@ -4,7 +4,7 @@
 
 - compatible: has to be "renesas,irqc-<soctype>", "renesas,irqc" as fallback.
   Examples with soctypes are:
-    - "renesas,irqc-r8a73a4" (R-Mobile AP6)
+    - "renesas,irqc-r8a73a4" (R-Mobile APE6)
     - "renesas,irqc-r8a7790" (R-Car H2)
     - "renesas,irqc-r8a7791" (R-Car M2-W)
     - "renesas,irqc-r8a7792" (R-Car V2H)
@@ -12,6 +12,7 @@
     - "renesas,irqc-r8a7794" (R-Car E2)
 - #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
   interrupts.txt in this directory
+- clocks: Must contain a reference to the functional clock.
 
 Optional properties:
 
@@ -29,4 +30,5 @@
 			     <0 1 IRQ_TYPE_LEVEL_HIGH>,
 			     <0 2 IRQ_TYPE_LEVEL_HIGH>,
 			     <0 3 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp4_clks R8A7790_CLK_IRQC>;
 	};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt b/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt
new file mode 100644
index 0000000..ced6014
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt
@@ -0,0 +1,35 @@
+STMicroelectronics STi System Configuration Controlled IRQs
+-----------------------------------------------------------
+
+On STi based systems; External, CTI (Core Sight), PMU (Performance Management),
+and PL310 L2 Cache IRQs are controlled using System Configuration registers.
+This driver is used to unmask them prior to use.
+
+Required properties:
+- compatible	: Should be set to one of:
+			"st,stih415-irq-syscfg"
+			"st,stih416-irq-syscfg"
+			"st,stih407-irq-syscfg"
+			"st,stid127-irq-syscfg"
+- st,syscfg	: Phandle to Cortex-A9 IRQ system config registers
+- st,irq-device	: Array of IRQs to enable - should be 2 in length
+- st,fiq-device	: Array of FIQs to enable - should be 2 in length
+
+Optional properties:
+- st,invert-ext	: External IRQs can be inverted at will.  This property inverts
+		  these IRQs using bitwise logic.  A number of defines have been
+		  provided for convenience:
+			ST_IRQ_SYSCFG_EXT_1_INV
+			ST_IRQ_SYSCFG_EXT_2_INV
+			ST_IRQ_SYSCFG_EXT_3_INV
+Example:
+
+irq-syscfg {
+	compatible    = "st,stih416-irq-syscfg";
+	st,syscfg     = <&syscfg_cpu>;
+	st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
+			<ST_IRQ_SYSCFG_PMU_1>;
+	st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
+			<ST_IRQ_SYSCFG_DISABLED>;
+	st,invert-ext = <(ST_IRQ_SYSCFG_EXT_1_INV | ST_IRQ_SYSCFG_EXT_3_INV)>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu b/Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu
new file mode 100644
index 0000000..43effa0
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu
@@ -0,0 +1,33 @@
+TI OMAP4 Wake-up Generator
+
+All TI OMAP4/5 (and their derivatives) an interrupt controller that
+routes interrupts to the GIC, and also serves as a wakeup source. It
+is also referred to as "WUGEN-MPU", hence the name of the binding.
+
+Reguired properties:
+
+- compatible : should contain at least "ti,omap4-wugen-mpu" or
+  "ti,omap5-wugen-mpu"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value must be 3.
+- interrupt-parent : a phandle to the GIC these interrupts are routed
+  to.
+
+Notes:
+
+- Because this HW ultimately routes interrupts to the GIC, the
+  interrupt specifier must be that of the GIC.
+- Only SPIs can use the WUGEN as an interrupt parent. SGIs and PPIs
+  are explicitly forbiden.
+
+Example:
+
+       wakeupgen: interrupt-controller@48281000 {
+               compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu";
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               reg = <0x48281000 0x1000>;
+               interrupt-parent = <&gic>;
+       };
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
new file mode 100644
index 0000000..72cc9cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
@@ -0,0 +1,23 @@
+Broadcom IPROC SDHCI controller
+
+This file documents differences between the core properties described
+by mmc.txt and the properties that represent the IPROC SDHCI controller.
+
+Required properties:
+- compatible : Should be "brcm,sdhci-iproc-cygnus".
+- clocks : The clock feeding the SDHCI controller.
+
+Optional properties:
+  - sdhci,auto-cmd12: specifies that controller should use auto CMD12.
+
+Example:
+
+sdhci0: sdhci@0x18041000 {
+	compatible = "brcm,sdhci-iproc-cygnus";
+	reg = <0x18041000 0x100>;
+	interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&lcpll0_clks BCM_CYGNUS_LCPLL0_SDIO_CLK>;
+	bus-width = <4>;
+	sdhci,auto-cmd12;
+	no-1-8-v;
+};
diff --git a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
index ee4fc05..aad9844 100644
--- a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
@@ -36,6 +36,8 @@
   in transmit mode and CIU clock phase shift value in receive mode for double
   data rate mode operation. Refer notes below for the order of the cells and the
   valid values.
+* samsung,dw-mshc-hs400-timing: Specifies the value of CIU TX and RX clock phase
+  shift value for hs400 mode operation.
 
   Notes for the sdr-timing and ddr-timing values:
 
@@ -50,6 +52,9 @@
       - if CIU clock divider value is 0 (that is divide by 1), both tx and rx
         phase shift clocks should be 0.
 
+* samsung,read-strobe-delay: RCLK (Data strobe) delay to control HS400 mode
+  (Latency value for delay line in Read path)
+
 Required properties for a slot (Deprecated - Recommend to use one slot per host):
 
 * gpios: specifies a list of gpios used for command, clock and data bus. The
@@ -82,5 +87,7 @@
 		samsung,dw-mshc-ciu-div = <3>;
 		samsung,dw-mshc-sdr-timing = <2 3>;
 		samsung,dw-mshc-ddr-timing = <1 2>;
+		samsung,dw-mshc-hs400-timing = <0 2>;
+		samsung,read-strobe-delay = <90>;
 		bus-width = <8>;
 	};
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
index 9046ba06..415c557 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
@@ -17,6 +17,10 @@
   to select a proper data sampling window in case the clock quality is not good
   due to signal path is too long on the board. Please refer to eSDHC/uSDHC
   chapter, DLL (Delay Line) section in RM for details.
+- voltage-ranges : Specify the voltage range in case there are software
+  transparent level shifters on the outputs of the controller. Two cells are
+  required, first cell specifies minimum slot voltage (mV), second cell
+  specifies maximum slot voltage (mV). Several ranges could be specified.
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/mmc/mmc-card.txt b/Documentation/devicetree/bindings/mmc/mmc-card.txt
new file mode 100644
index 0000000..a70fcd6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc-card.txt
@@ -0,0 +1,31 @@
+mmc-card / eMMC bindings
+------------------------
+
+This documents describes the devicetree bindings for a mmc-host controller
+child node describing a mmc-card / an eMMC, see "Use of Function subnodes"
+in mmc.txt
+
+Required properties:
+-compatible : Must be "mmc-card"
+-reg        : Must be <0>
+
+Optional properties:
+-broken-hpi : Use this to indicate that the mmc-card has a broken hpi
+              implementation, and that hpi should not be used
+
+Example:
+
+&mmc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc2_pins_a>;
+	vmmc-supply = <&reg_vcc3v3>;
+	bus-width = <8>;
+	non-removable;
+	status = "okay";
+
+	mmccard: mmccard@0 {
+		reg = <0>;
+		compatible = "mmc-card";
+		broken-hpi;
+	};
+};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
index 7527db4..18d950d 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -5,20 +5,62 @@
 used by the sdhci-st driver.
 
 Required properties:
-- compatible :  Must be "st,sdhci"
-- clock-names : Should be "mmc"
-                See: Documentation/devicetree/bindings/resource-names.txt
-- clocks :      Phandle of the clock used by the sdhci controler
-                See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+- compatible:		Must be "st,sdhci" and it can be compatible to "st,sdhci-stih407"
+			to set the internal glue logic used for configuring the MMC
+			subsystem (mmcss) inside the FlashSS (available in STiH407 SoC
+			family).
+
+- clock-names:		Should be "mmc".
+			See: Documentation/devicetree/bindings/resource-names.txt
+- clocks:		Phandle to the clock.
+			See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+- interrupts:		One mmc interrupt should be described here.
+- interrupt-names:	Should be "mmcirq".
+
+- pinctrl-names:	A pinctrl state names "default" must be defined.
+- pinctrl-0:		Phandle referencing pin configuration of the sd/emmc controller.
+			See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+- reg:			This must provide the host controller base address and it can also
+			contain the FlashSS Top register for TX/RX delay used by the driver
+			to configure DLL inside the flashSS, if so reg-names must also be
+			specified.
 
 Optional properties:
-- non-removable: non-removable slot
-                 See: Documentation/devicetree/bindings/mmc/mmc.txt
-- bus-width: Number of data lines
-                 See: Documentation/devicetree/bindings/mmc/mmc.txt
+- reg-names:		Should be "mmc" and "top-mmc-delay". "top-mmc-delay" is optional
+			for eMMC on stih407 family silicon to configure DLL inside FlashSS.
+
+- non-removable:	Non-removable slot. Also used for configuring mmcss in STiH407 SoC
+			family.
+			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- bus-width:		Number of data lines.
+			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- max-frequency: 	Can be 200MHz, 100Mz or 50MHz (default) and used for
+			configuring the CCONFIG3 in the mmcss.
+			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- resets:		Phandle and reset specifier pair to softreset line of HC IP.
+			See: Documentation/devicetree/bindings/reset/reset.txt
+
+- vqmmc-supply:		Phandle to the regulator dt node, mentioned as the vcc/vdd
+			supply in eMMC/SD specs.
+
+- sd-uhs--sdr50:	To enable the SDR50 in the mmcss.
+			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- sd-uhs-sdr104:	To enable the SDR104 in the mmcss.
+			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- sd-uhs-ddr50:		To enable the DDR50 in the mmcss.
+			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
 
 Example:
 
+/* Example stih416e eMMC configuration */
+
 mmc0: sdhci@fe81e000 {
 	compatible	= "st,sdhci";
 	status		= "disabled";
@@ -29,5 +71,43 @@
 	pinctrl-0	= <&pinctrl_mmc0>;
 	clock-names	= "mmc";
 	clocks		= <&clk_s_a1_ls 1>;
-	bus-width       = <8>
+	bus-width	= <8>
+
+/* Example SD stih407 family configuration */
+
+mmc1: sdhci@09080000 {
+	compatible	= "st,sdhci-stih407", "st,sdhci";
+	status		= "disabled";
+	reg		= <0x09080000 0x7ff>;
+	reg-names	= "mmc";
+	interrupts	= <GIC_SPI 90 IRQ_TYPE_NONE>;
+	interrupt-names	= "mmcirq";
+	pinctrl-names	= "default";
+	pinctrl-0	= <&pinctrl_sd1>;
+	clock-names	= "mmc";
+	clocks		= <&clk_s_c0_flexgen CLK_MMC_1>;
+	resets		= <&softreset STIH407_MMC1_SOFTRESET>;
+	bus-width	= <4>;
+};
+
+/* Example eMMC stih407 family configuration */
+
+mmc0: sdhci@09060000 {
+	compatible	= "st,sdhci-stih407", "st,sdhci";
+	status		= "disabled";
+	reg		= <0x09060000 0x7ff>, <0x9061008 0x20>;
+	reg-names	= "mmc", "top-mmc-delay";
+	interrupts	= <GIC_SPI 92 IRQ_TYPE_NONE>;
+	interrupt-names	= "mmcirq";
+	pinctrl-names	= "default";
+	pinctrl-0	= <&pinctrl_mmc0>;
+	clock-names	= "mmc";
+	clocks		= <&clk_s_c0_flexgen CLK_MMC_0>;
+	vqmmc-supply	= <&vmmc_reg>;
+	max-frequency	= <200000000>;
+	bus-width	= <8>;
+	non-removable;
+	sd-uhs-sdr50;
+	sd-uhs-sdr104;
+	sd-uhs-ddr50;
 };
diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
new file mode 100644
index 0000000..f7ce50e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
@@ -0,0 +1,63 @@
+* Broadcom iProc PCIe controller with the platform bus interface
+
+Required properties:
+- compatible: Must be "brcm,iproc-pcie"
+- reg: base address and length of the PCIe controller I/O register space
+- #interrupt-cells: set to <1>
+- interrupt-map-mask and interrupt-map, standard PCI properties to define the
+  mapping of the PCIe interface to interrupt numbers
+- linux,pci-domain: PCI domain ID. Should be unique for each host controller
+- bus-range: PCI bus numbers covered
+- #address-cells: set to <3>
+- #size-cells: set to <2>
+- device_type: set to "pci"
+- ranges: ranges for the PCI memory and I/O regions
+
+Optional properties:
+- phys: phandle of the PCIe PHY device
+- phy-names: must be "pcie-phy"
+
+Example:
+	pcie0: pcie@18012000 {
+		compatible = "brcm,iproc-pcie";
+		reg = <0x18012000 0x1000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>;
+
+		linux,pci-domain = <0>;
+
+		bus-range = <0x00 0xff>;
+
+		#address-cells = <3>;
+		#size-cells = <2>;
+		device_type = "pci";
+		ranges = <0x81000000 0 0	  0x28000000 0 0x00010000
+			  0x82000000 0 0x20000000 0x20000000 0 0x04000000>;
+
+		phys = <&phy 0 5>;
+		phy-names = "pcie-phy";
+	};
+
+	pcie1: pcie@18013000 {
+		compatible = "brcm,iproc-pcie";
+		reg = <0x18013000 0x1000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>;
+
+		linux,pci-domain = <1>;
+
+		bus-range = <0x00 0xff>;
+
+		#address-cells = <3>;
+		#size-cells = <2>;
+		device_type = "pci";
+		ranges = <0x81000000 0 0	  0x48000000 0 0x00010000
+			  0x82000000 0 0x40000000 0x40000000 0 0x04000000>;
+
+		phys = <&phy 1 6>;
+		phy-names = "pcie-phy";
+	};
diff --git a/Documentation/devicetree/bindings/power/da9150-charger.txt b/Documentation/devicetree/bindings/power/da9150-charger.txt
new file mode 100644
index 0000000..f390666
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/da9150-charger.txt
@@ -0,0 +1,26 @@
+Dialog Semiconductor DA9150 Charger Power Supply bindings
+
+Required properties:
+- compatible: "dlg,da9150-charger" for DA9150 Charger Power Supply
+
+Optional properties:
+- io-channels: List of phandle and IIO specifier pairs
+- io-channel-names: List of channel names used by charger
+      ["CHAN_IBUS", "CHAN_VBUS", "CHAN_TJUNC", "CHAN_VBAT"]
+  (See Documentation/devicetree/bindings/iio/iio-bindings.txt for further info)
+
+
+Example:
+
+	da9150-charger {
+		compatible = "dlg,da9150-charger";
+
+		io-channels = <&gpadc 0>,
+			      <&gpadc 2>,
+			      <&gpadc 8>,
+			      <&gpadc 5>;
+		io-channel-names = "CHAN_IBUS",
+				   "CHAN_VBUS",
+				   "CHAN_TJUNC",
+				   "CHAN_VBAT";
+	};
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
new file mode 100644
index 0000000..1e2546f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
@@ -0,0 +1,23 @@
+Generic SYSCON mapped register poweroff driver
+
+This is a generic poweroff driver using syscon to map the poweroff register.
+The poweroff is generally performed with a write to the poweroff register
+defined by the register map pointed by syscon reference plus the offset
+with the mask defined in the poweroff node.
+
+Required properties:
+- compatible: should contain "syscon-poweroff"
+- regmap: this is phandle to the register map node
+- offset: offset in the register map for the poweroff register (in bytes)
+- mask: the poweroff value written to the poweroff register (32 bit access)
+
+Default will be little endian mode, 32 bit access only.
+
+Examples:
+
+	poweroff {
+	   compatible = "syscon-poweroff";
+	   regmap = <&regmapnode>;
+	   offset = <0x0>;
+	   mask = <0x7a>;
+	};
diff --git a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
index dad6358..e91485d 100644
--- a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
@@ -2,13 +2,35 @@
 -------------------
 
 Required properties:
-- compatible: "active-semi,act8846" or "active-semi,act8865"
+- compatible: "active-semi,act8846" or "active-semi,act8865" or "active-semi,act8600"
 - reg: I2C slave address
 
 Optional properties:
 - system-power-controller: Telling whether or not this pmic is controlling
   the system power. See Documentation/devicetree/bindings/power/power-controller.txt .
 
+Optional input supply properties:
+- for act8600:
+  - vp1-supply: The input supply for DCDC_REG1
+  - vp2-supply: The input supply for DCDC_REG2
+  - vp3-supply: The input supply for DCDC_REG3
+  - inl-supply: The input supply for LDO_REG5, LDO_REG6, LDO_REG7 and LDO_REG8
+  SUDCDC_REG4, LDO_REG9 and LDO_REG10 do not have separate supplies.
+- for act8846:
+  - vp1-supply: The input supply for REG1
+  - vp2-supply: The input supply for REG2
+  - vp3-supply: The input supply for REG3
+  - vp4-supply: The input supply for REG4
+  - inl1-supply: The input supply for REG5, REG6 and REG7
+  - inl2-supply: The input supply for REG8 and LDO_REG9
+  - inl3-supply: The input supply for REG10, REG11 and REG12
+- for act8865:
+  - vp1-supply: The input supply for DCDC_REG1
+  - vp2-supply: The input supply for DCDC_REG2
+  - vp3-supply: The input supply for DCDC_REG3
+  - inl45-supply: The input supply for LDO_REG1 and LDO_REG2
+  - inl67-supply: The input supply for LDO_REG3 and LDO_REG4
+
 Any standard regulator properties can be used to configure the single regulator.
 
 The valid names for regulators are:
@@ -16,6 +38,9 @@
 	REG1, REG2, REG3, REG4, REG5, REG6, REG7, REG8, REG9, REG10, REG11, REG12
 	- for act8865:
 	DCDC_REG1, DCDC_REG2, DCDC_REG3, LDO_REG1, LDO_REG2, LDO_REG3, LDO_REG4.
+	- for act8600:
+	DCDC_REG1, DCDC_REG2, DCDC_REG3, SUDCDC_REG4, LDO_REG5, LDO_REG6, LDO_REG7,
+	LDO_REG8, LDO_REG9, LDO_REG10,
 
 Example:
 --------
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index aad527b..523341a 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -2,11 +2,21 @@
   (CSPI/eCSPI) for i.MX
 
 Required properties:
-- compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi"
+- compatible :
+  - "fsl,imx1-cspi" for SPI compatible with the one integrated on i.MX1
+  - "fsl,imx21-cspi" for SPI compatible with the one integrated on i.MX21
+  - "fsl,imx27-cspi" for SPI compatible with the one integrated on i.MX27
+  - "fsl,imx31-cspi" for SPI compatible with the one integrated on i.MX31
+  - "fsl,imx35-cspi" for SPI compatible with the one integrated on i.MX35
+  - "fsl,imx51-ecspi" for SPI compatible with the one integrated on i.MX51
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
 - fsl,spi-num-chipselects : Contains the number of the chipselect
 - cs-gpios : Specifies the gpio pins to be used for chipselects.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+	Documentation/devicetree/bindings/clock/clock-bindings.txt
 - dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
 		Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: DMA request names should include "tx" and "rx" if present.
diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt b/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
index e2c88df..5c09077 100644
--- a/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
+++ b/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
@@ -33,6 +33,11 @@
 		nodes.  If unspecified, a single SPI device without a chip
 		select can be used.
 
+- dmas:         Two DMA channel specifiers following the convention outlined
+                in bindings/dma/dma.txt
+- dma-names:    Names for the dma channels, if present. There must be at
+                least one channel named "tx" for transmit and named "rx" for
+                receive.
 
 SPI slave nodes must be children of the SPI master node and can contain
 properties described in Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -51,6 +56,9 @@
 		clocks = <&gcc GCC_BLSP2_QUP2_SPI_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
 		clock-names = "core", "iface";
 
+		dmas = <&blsp1_bam 13>, <&blsp1_bam 12>;
+		dma-names = "rx", "tx";
+
 		pinctrl-names = "default";
 		pinctrl-0 = <&spi8_default>;
 
diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
index cbbe16e..70af78a 100644
--- a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
@@ -16,6 +16,12 @@
   in big endian mode, otherwise in native mode(same with CPU), for more
   detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
 
+Optional SPI slave node properties:
+- fsl,spi-cs-sck-delay: a delay in nanoseconds between activating chip
+  select and the start of clock signal, at the start of a transfer.
+- fsl,spi-sck-cs-delay: a delay in nanoseconds between stopping the clock
+  signal and deactivating chip select, at the end of a transfer.
+
 Example:
 
 dspi0@4002c000 {
@@ -43,6 +49,8 @@
 		reg = <0>;
 		linux,modalias = "m25p80";
 		modal = "at26df081a";
+		fsl,spi-cs-sck-delay = <100>;
+		fsl,spi-sck-cs-delay = <50>;
 	};
 };
 
diff --git a/Documentation/devicetree/bindings/spi/spi-img-spfi.txt b/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
index c7dd50f..e02fbf1 100644
--- a/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
@@ -14,6 +14,7 @@
 - dma-names: Must include the following entries:
   - rx
   - tx
+- cs-gpios: Must specify the GPIOs used for chipselect lines.
 - #address-cells: Must be 1.
 - #size-cells: Must be 0.
 
diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.txt b/Documentation/devicetree/bindings/spi/spi-rockchip.txt
index 467dec4..0c491bd 100644
--- a/Documentation/devicetree/bindings/spi/spi-rockchip.txt
+++ b/Documentation/devicetree/bindings/spi/spi-rockchip.txt
@@ -24,6 +24,9 @@
 - dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
 		Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: DMA request names should include "tx" and "rx" if present.
+- rx-sample-delay-ns: nanoseconds to delay after the SCLK edge before sampling
+		Rx data (may need to be fine tuned for high capacitance lines).
+		No delay (0) by default.
 
 
 Example:
@@ -33,6 +36,7 @@
 		reg = <0xff110000 0x1000>;
 		dmas = <&pdma1 11>, <&pdma1 12>;
 		dma-names = "tx", "rx";
+		rx-sample-delay-ns = <10>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 		interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
index 43404b1..332e625 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
@@ -4,7 +4,7 @@
 - compatible		: "renesas,thermal-<soctype>", "renesas,rcar-thermal"
 			  as fallback.
 			  Examples with soctypes are:
-			    - "renesas,thermal-r8a73a4" (R-Mobile AP6)
+			    - "renesas,thermal-r8a73a4" (R-Mobile APE6)
 			    - "renesas,thermal-r8a7779" (R-Car H1)
 			    - "renesas,thermal-r8a7790" (R-Car H2)
 			    - "renesas,thermal-r8a7791" (R-Car M2-W)
diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
index fe80e9a..e872948 100644
--- a/Documentation/hwmon/it87
+++ b/Documentation/hwmon/it87
@@ -6,6 +6,10 @@
     Prefix: 'it8603'
     Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
+  * IT8620E
+    Prefix: 'it8620'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
   * IT8705F
     Prefix: 'it87'
     Addresses scanned: from Super I/O config space (8 I/O ports)
@@ -42,6 +46,10 @@
     Prefix: 'it8772'
     Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
+  * IT8781F
+    Prefix: 'it8781'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
   * IT8782F
     Prefix: 'it8782'
     Addresses scanned: from Super I/O config space (8 I/O ports)
@@ -50,6 +58,14 @@
     Prefix: 'it8783'
     Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
+  * IT8786E
+    Prefix: 'it8786'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
+  * IT8790E
+    Prefix: 'it8790'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
   * SiS950   [clone of IT8705F]
     Prefix: 'it87'
     Addresses scanned: from Super I/O config space (8 I/O ports)
@@ -94,9 +110,10 @@
 Description
 -----------
 
-This driver implements support for the IT8603E, IT8623E, IT8705F, IT8712F,
-IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E, IT8771E,
-IT8772E, IT8782F, IT8783E/F, and SiS950 chips.
+This driver implements support for the IT8603E, IT8620E, IT8623E, IT8705F,
+IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E,
+IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E, and SiS950
+chips.
 
 These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
 joysticks and other miscellaneous stuff. For hardware monitoring, they
@@ -120,11 +137,11 @@
 have support for 2 additional fans. The additional fans are supported by the
 driver.
 
-The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E, IT8782F, IT8783E/F, and late
-IT8712F and IT8705F also have optional 16-bit tachometer counters for fans 1 to
-3. This is better (no more fan clock divider mess) but not compatible with the
-older chips and revisions. The 16-bit tachometer mode is enabled by the driver
-when one of the above chips is detected.
+The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E, IT8781F, IT8782F, IT8783E/F,
+and late IT8712F and IT8705F also have optional 16-bit tachometer counters
+for fans 1 to 3. This is better (no more fan clock divider mess) but not
+compatible with the older chips and revisions. The 16-bit tachometer mode
+is enabled by the driver when one of the above chips is detected.
 
 The IT8726F is just bit enhanced IT8716F with additional hardware
 for AMD power sequencing. Therefore the chip will appear as IT8716F
@@ -134,8 +151,13 @@
 until a datasheet becomes available (hopefully.)
 
 The IT8603E/IT8623E is a custom design, hardware monitoring part is similar to
-IT8728F. It only supports 16-bit fan mode, the full speed mode of the
-fan is not supported (value 0 of pwmX_enable).
+IT8728F. It only supports 3 fans, 16-bit fan mode, and the full speed mode
+of the fan is not supported (value 0 of pwmX_enable).
+
+The IT8620E is another custom design, hardware monitoring part is similar to
+IT8728F. It only supports 16-bit fan mode.
+
+The IT8790E supports up to 3 fans. 16-bit fan mode is always enabled.
 
 Temperatures are measured in degrees Celsius. An alarm is triggered once
 when the Overtemperature Shutdown limit is crossed.
@@ -156,10 +178,10 @@
 0.016 volt (except IT8603E, IT8721F/IT8758E and IT8728F: 0.012 volt.) The
 battery voltage in8 does not have limit registers.
 
-On the IT8603E, IT8721F/IT8758E, IT8782F, and IT8783E/F, some voltage inputs
-are internal and scaled inside the chip:
+On the IT8603E, IT8721F/IT8758E, IT8781F, IT8782F, and IT8783E/F, some
+voltage inputs are internal and scaled inside the chip:
 * in3 (optional)
-* in7 (optional for IT8782F and IT8783E/F)
+* in7 (optional for IT8781F, IT8782F, and IT8783E/F)
 * in8 (always)
 * in9 (relevant for IT8603E only)
 The driver handles this transparently so user-space doesn't have to care.
diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42
index f3893f7..f7f1830a 100644
--- a/Documentation/hwmon/jc42
+++ b/Documentation/hwmon/jc42
@@ -11,12 +11,10 @@
 	http://www.atmel.com/Images/doc8711.pdf
 	http://www.atmel.com/Images/Atmel-8852-SEEPROM-AT30TSE002A-Datasheet.pdf
 	http://www.atmel.com/Images/Atmel-8868-DTS-AT30TSE004A-Datasheet.pdf
-  * IDT TSE2002B3, TSE2002GB2, TS3000B3, TS3000GB2
+  * IDT TSE2002B3, TSE2002GB2, TSE2004GB2, TS3000B3, TS3000GB0, TS3000GB2,
+	TS3001GB2
     Datasheets:
-	http://www.idt.com/sites/default/files/documents/IDT_TSE2002B3C_DST_20100512_120303152056.pdf
-	http://www.idt.com/sites/default/files/documents/IDT_TSE2002GB2A1_DST_20111107_120303145914.pdf
-	http://www.idt.com/sites/default/files/documents/IDT_TS3000B3A_DST_20101129_120303152013.pdf
-	http://www.idt.com/sites/default/files/documents/IDT_TS3000GB2A1_DST_20111104_120303151012.pdf
+	Available from IDT web site
   * Maxim MAX6604
     Datasheets:
 	http://datasheets.maxim-ic.com/en/ds/MAX6604.pdf
diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904
new file mode 100644
index 0000000..014f112
--- /dev/null
+++ b/Documentation/hwmon/nct7904
@@ -0,0 +1,60 @@
+Kernel driver nct7904
+====================
+
+Supported chip:
+  * Nuvoton NCT7904D
+    Prefix: nct7904
+    Addresses: I2C 0x2d, 0x2e
+    Datasheet: Publicly available at Nuvoton website
+	http://www.nuvoton.com/
+
+Author: Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>
+
+
+Description
+-----------
+
+The NCT7904D is a hardware monitor supporting up to 20 voltage sensors,
+internal temperature sensor, Intel PECI and AMD SB-TSI CPU temperature
+interface, up to 12 fan tachometer inputs, up to 4 fan control channels
+with SmartFan.
+
+
+Sysfs entries
+-------------
+
+Currently, the driver supports only the following features:
+
+in[1-20]_input		Input voltage measurements (mV)
+
+fan[1-12]_input		Fan tachometer measurements (rpm)
+
+temp1_input		Local temperature (1/1000 degree,
+			0.125 degree resolution)
+
+temp[2-9]_input		CPU temperatures (1/1000 degree,
+			0.125 degree resolution)
+
+fan[1-4]_mode		R/W, 0/1 for manual or SmartFan mode
+			Setting SmartFan mode is supported only if it has been
+			previously configured by BIOS (or configuration EEPROM)
+
+fan[1-4]_pwm		R/O in SmartFan mode, R/W in manual control mode
+
+The driver checks sensor control registers and does not export the sensors
+that are not enabled. Anyway, a sensor that is enabled may actually be not
+connected and thus provide zero readings.
+
+
+Limitations
+-----------
+
+The following features are not supported in current version:
+
+ - SmartFan control
+ - Watchdog
+ - GPIO
+ - external temperature sensors
+ - SMI
+ - min/max values
+ - many other...
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bfcb1a6..01aa47d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1036,7 +1036,7 @@
 			Format: {"off" | "on" | "skip[mbr]"}
 
 	efi=		[EFI]
-			Format: { "old_map", "nochunk", "noruntime" }
+			Format: { "old_map", "nochunk", "noruntime", "debug" }
 			old_map [X86-64]: switch to the old ioremap-based EFI
 			runtime services mapping. 32-bit still uses this one by
 			default.
@@ -1044,6 +1044,7 @@
 			boot stub, as chunking can cause problems with some
 			firmware implementations.
 			noruntime : disable EFI runtime services support
+			debug: enable misc debug output
 
 	efi_no_storage_paranoia [EFI; X86]
 			Using this parameter you can use more than 50% of
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
index 8afb236c..e51564c 100644
--- a/Documentation/power/regulator/consumer.txt
+++ b/Documentation/power/regulator/consumer.txt
@@ -137,7 +137,7 @@
 Consumer drivers can request a change in their supply regulator operating mode
 by calling :-
 
-int regulator_set_optimum_mode(struct regulator *regulator, int load_uA);
+int regulator_set_load(struct regulator *regulator, int load_uA);
 
 This will cause the core to recalculate the total load on the regulator (based
 on all its consumers) and change operating mode (if necessary and permitted)
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 596b60c..8446f1e 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -204,266 +204,4 @@
 
     *	RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
 
-If all else fails, check out the rtc-test.c driver!
-
-
--------------------- 8< ---------------- 8< -----------------------------
-
-/*
- *      Real Time Clock Driver Test/Example Program
- *
- *      Compile with:
- *		     gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
- *
- *      Copyright (C) 1996, Paul Gortmaker.
- *
- *      Released under the GNU General Public License, version 2,
- *      included herein by reference.
- *
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-
-/*
- * This expects the new RTC class driver framework, working with
- * clocks that will often not be clones of what the PC-AT had.
- * Use the command line to specify another RTC if you need one.
- */
-static const char default_rtc[] = "/dev/rtc0";
-
-
-int main(int argc, char **argv)
-{
-	int i, fd, retval, irqcount = 0;
-	unsigned long tmp, data;
-	struct rtc_time rtc_tm;
-	const char *rtc = default_rtc;
-
-	switch (argc) {
-	case 2:
-		rtc = argv[1];
-		/* FALLTHROUGH */
-	case 1:
-		break;
-	default:
-		fprintf(stderr, "usage:  rtctest [rtcdev]\n");
-		return 1;
-	}
-
-	fd = open(rtc, O_RDONLY);
-
-	if (fd ==  -1) {
-		perror(rtc);
-		exit(errno);
-	}
-
-	fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
-
-	/* Turn on update interrupts (one per second) */
-	retval = ioctl(fd, RTC_UIE_ON, 0);
-	if (retval == -1) {
-		if (errno == ENOTTY) {
-			fprintf(stderr,
-				"\n...Update IRQs not supported.\n");
-			goto test_READ;
-		}
-		perror("RTC_UIE_ON ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
-			rtc);
-	fflush(stderr);
-	for (i=1; i<6; i++) {
-		/* This read will block */
-		retval = read(fd, &data, sizeof(unsigned long));
-		if (retval == -1) {
-			perror("read");
-			exit(errno);
-		}
-		fprintf(stderr, " %d",i);
-		fflush(stderr);
-		irqcount++;
-	}
-
-	fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
-	fflush(stderr);
-	for (i=1; i<6; i++) {
-		struct timeval tv = {5, 0};     /* 5 second timeout on select */
-		fd_set readfds;
-
-		FD_ZERO(&readfds);
-		FD_SET(fd, &readfds);
-		/* The select will wait until an RTC interrupt happens. */
-		retval = select(fd+1, &readfds, NULL, NULL, &tv);
-		if (retval == -1) {
-		        perror("select");
-		        exit(errno);
-		}
-		/* This read won't block unlike the select-less case above. */
-		retval = read(fd, &data, sizeof(unsigned long));
-		if (retval == -1) {
-		        perror("read");
-		        exit(errno);
-		}
-		fprintf(stderr, " %d",i);
-		fflush(stderr);
-		irqcount++;
-	}
-
-	/* Turn off update interrupts */
-	retval = ioctl(fd, RTC_UIE_OFF, 0);
-	if (retval == -1) {
-		perror("RTC_UIE_OFF ioctl");
-		exit(errno);
-	}
-
-test_READ:
-	/* Read the RTC time/date */
-	retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
-	if (retval == -1) {
-		perror("RTC_RD_TIME ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
-		rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
-		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-	/* Set the alarm to 5 sec in the future, and check for rollover */
-	rtc_tm.tm_sec += 5;
-	if (rtc_tm.tm_sec >= 60) {
-		rtc_tm.tm_sec %= 60;
-		rtc_tm.tm_min++;
-	}
-	if (rtc_tm.tm_min == 60) {
-		rtc_tm.tm_min = 0;
-		rtc_tm.tm_hour++;
-	}
-	if (rtc_tm.tm_hour == 24)
-		rtc_tm.tm_hour = 0;
-
-	retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
-	if (retval == -1) {
-		if (errno == ENOTTY) {
-			fprintf(stderr,
-				"\n...Alarm IRQs not supported.\n");
-			goto test_PIE;
-		}
-		perror("RTC_ALM_SET ioctl");
-		exit(errno);
-	}
-
-	/* Read the current alarm settings */
-	retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
-	if (retval == -1) {
-		perror("RTC_ALM_READ ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
-		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-	/* Enable alarm interrupts */
-	retval = ioctl(fd, RTC_AIE_ON, 0);
-	if (retval == -1) {
-		perror("RTC_AIE_ON ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Waiting 5 seconds for alarm...");
-	fflush(stderr);
-	/* This blocks until the alarm ring causes an interrupt */
-	retval = read(fd, &data, sizeof(unsigned long));
-	if (retval == -1) {
-		perror("read");
-		exit(errno);
-	}
-	irqcount++;
-	fprintf(stderr, " okay. Alarm rang.\n");
-
-	/* Disable alarm interrupts */
-	retval = ioctl(fd, RTC_AIE_OFF, 0);
-	if (retval == -1) {
-		perror("RTC_AIE_OFF ioctl");
-		exit(errno);
-	}
-
-test_PIE:
-	/* Read periodic IRQ rate */
-	retval = ioctl(fd, RTC_IRQP_READ, &tmp);
-	if (retval == -1) {
-		/* not all RTCs support periodic IRQs */
-		if (errno == ENOTTY) {
-			fprintf(stderr, "\nNo periodic IRQ support\n");
-			goto done;
-		}
-		perror("RTC_IRQP_READ ioctl");
-		exit(errno);
-	}
-	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
-
-	fprintf(stderr, "Counting 20 interrupts at:");
-	fflush(stderr);
-
-	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
-	for (tmp=2; tmp<=64; tmp*=2) {
-
-		retval = ioctl(fd, RTC_IRQP_SET, tmp);
-		if (retval == -1) {
-			/* not all RTCs can change their periodic IRQ rate */
-			if (errno == ENOTTY) {
-				fprintf(stderr,
-					"\n...Periodic IRQ rate is fixed\n");
-				goto done;
-			}
-			perror("RTC_IRQP_SET ioctl");
-			exit(errno);
-		}
-
-		fprintf(stderr, "\n%ldHz:\t", tmp);
-		fflush(stderr);
-
-		/* Enable periodic interrupts */
-		retval = ioctl(fd, RTC_PIE_ON, 0);
-		if (retval == -1) {
-			perror("RTC_PIE_ON ioctl");
-			exit(errno);
-		}
-
-		for (i=1; i<21; i++) {
-			/* This blocks */
-			retval = read(fd, &data, sizeof(unsigned long));
-			if (retval == -1) {
-				perror("read");
-				exit(errno);
-			}
-			fprintf(stderr, " %d",i);
-			fflush(stderr);
-			irqcount++;
-		}
-
-		/* Disable periodic interrupts */
-		retval = ioctl(fd, RTC_PIE_OFF, 0);
-		if (retval == -1) {
-			perror("RTC_PIE_OFF ioctl");
-			exit(errno);
-		}
-	}
-
-done:
-	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
-
-	close(fd);
-
-	return 0;
-}
+If all else fails, check out the tools/testing/selftests/timers/rtctest.c test!
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
index 1d508dc..8586eff 100644
--- a/Documentation/scsi/ncr53c8xx.txt
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -786,7 +786,6 @@
         irqm:1     same as initial settings (assumed BIOS settings)
         irqm:2     always totem pole
         irqm:0x10  driver will not use IRQF_SHARED flag when requesting irq
-        irqm:0x20  driver will not use IRQF_DISABLED flag when requesting irq
 
     (Bits 0x10 and 0x20 can be combined with hardware irq mode option)
 
@@ -1231,30 +1230,6 @@
 may only be needed under Linux when a scatter/gather list is not used and 
 when the SCSI DATA IN phase is reentered after a phase mismatch.
 
-14.5 IRQ sharing problems
-
-When an IRQ is shared by devices that are handled by different drivers, it 
-may happen that one driver complains about the request of the IRQ having 
-failed. Inder Linux-2.0, this may be due to one driver having requested the 
-IRQ using the IRQF_DISABLED flag but some other having requested the same IRQ
-without this flag. Under both Linux-2.0 and linux-2.2, this may be caused by 
-one driver not having requested the IRQ with the IRQF_SHARED flag.
-
-By default, the ncr53c8xx and sym53c8xx drivers request IRQs with both the 
-IRQF_DISABLED and the IRQF_SHARED flag under Linux-2.0 and with only the IRQF_SHARED
-flag under Linux-2.2.
-
-Under Linux-2.0, you can disable use of IRQF_DISABLED flag from the boot
-command line by using the following option:
-
-     ncr53c8xx=irqm:0x20   (for the generic ncr53c8xx driver)
-     sym53c8xx=irqm:0x20   (for the sym53c8xx driver)
-
-If this does not fix the problem, then you may want to check how all other 
-drivers are requesting the IRQ and report the problem. Note that if at least 
-a single driver does not request the IRQ with the IRQF_SHARED flag (share IRQ),
-then the request of the IRQ obviously will not succeed for all the drivers.
-
 15. SCSI problem troubleshooting
 
 15.1 Problem tracking
diff --git a/Documentation/scsi/tmscsim.txt b/Documentation/scsi/tmscsim.txt
index 0810132..0e0322b 100644
--- a/Documentation/scsi/tmscsim.txt
+++ b/Documentation/scsi/tmscsim.txt
@@ -107,10 +107,6 @@
 MHz PCI bus works for me, though, but I don't recommend using higher clocks
 than the 33.33 MHz being in the PCI spec.
 
-If you want to share the IRQ with another device and the driver refuses to
-do so, you might succeed with changing the DC390_IRQ type in tmscsim.c to 
-IRQF_SHARED | IRQF_DISABLED.
-
 
 3.Features
 ----------
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index d29734b..d1824b3 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -342,12 +342,11 @@
 		.driver = {
 			.name		= "CHIP",
 			.owner		= THIS_MODULE,
+			.pm		= &CHIP_pm_ops,
 		},
 
 		.probe		= CHIP_probe,
 		.remove		= CHIP_remove,
-		.suspend	= CHIP_suspend,
-		.resume		= CHIP_resume,
 	};
 
 The driver core will automatically attempt to bind this driver to any SPI
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
index 3a2f9d5..94f574b 100644
--- a/Documentation/spi/spidev_test.c
+++ b/Documentation/spi/spidev_test.c
@@ -15,6 +15,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <getopt.h>
 #include <fcntl.h>
 #include <sys/ioctl.h>
@@ -34,24 +35,79 @@
 static uint8_t bits = 8;
 static uint32_t speed = 500000;
 static uint16_t delay;
+static int verbose;
 
-static void transfer(int fd)
+uint8_t default_tx[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xF0, 0x0D,
+};
+
+uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
+char *input_tx;
+
+static void hex_dump(const void *src, size_t length, size_t line_size, char *prefix)
+{
+	int i = 0;
+	const unsigned char *address = src;
+	const unsigned char *line = address;
+	unsigned char c;
+
+	printf("%s | ", prefix);
+	while (length-- > 0) {
+		printf("%02X ", *address++);
+		if (!(++i % line_size) || (length == 0 && i % line_size)) {
+			if (length == 0) {
+				while (i++ % line_size)
+					printf("__ ");
+			}
+			printf(" | ");  /* right close */
+			while (line < address) {
+				c = *line++;
+				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+			}
+			printf("\n");
+			if (length > 0)
+				printf("%s | ", prefix);
+		}
+	}
+}
+
+/*
+ *  Unescape - process hexadecimal escape character
+ *      converts shell input "\x23" -> 0x23
+ */
+int unespcape(char *_dst, char *_src, size_t len)
+{
+	int ret = 0;
+	char *src = _src;
+	char *dst = _dst;
+	unsigned int ch;
+
+	while (*src) {
+		if (*src == '\\' && *(src+1) == 'x') {
+			sscanf(src + 2, "%2x", &ch);
+			src += 4;
+			*dst++ = (unsigned char)ch;
+		} else {
+			*dst++ = *src++;
+		}
+		ret++;
+	}
+	return ret;
+}
+
+static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
 {
 	int ret;
-	uint8_t tx[] = {
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD,
-		0xF0, 0x0D,
-	};
-	uint8_t rx[ARRAY_SIZE(tx)] = {0, };
+
 	struct spi_ioc_transfer tr = {
 		.tx_buf = (unsigned long)tx,
 		.rx_buf = (unsigned long)rx,
-		.len = ARRAY_SIZE(tx),
+		.len = len,
 		.delay_usecs = delay,
 		.speed_hz = speed,
 		.bits_per_word = bits,
@@ -76,12 +132,9 @@
 	if (ret < 1)
 		pabort("can't send spi message");
 
-	for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
-		if (!(ret % 6))
-			puts("");
-		printf("%.2X ", rx[ret]);
-	}
-	puts("");
+	if (verbose)
+		hex_dump(tx, len, 32, "TX");
+	hex_dump(rx, len, 32, "RX");
 }
 
 static void print_usage(const char *prog)
@@ -97,6 +150,8 @@
 	     "  -L --lsb      least significant bit first\n"
 	     "  -C --cs-high  chip select active high\n"
 	     "  -3 --3wire    SI/SO signals shared\n"
+	     "  -v --verbose  Verbose (show tx buffer)\n"
+	     "  -p            Send data (e.g. \"1234\\xde\\xad\")\n"
 	     "  -N --no-cs    no chip select\n"
 	     "  -R --ready    slave pulls low to pause\n"
 	     "  -2 --dual     dual transfer\n"
@@ -121,12 +176,13 @@
 			{ "no-cs",   0, 0, 'N' },
 			{ "ready",   0, 0, 'R' },
 			{ "dual",    0, 0, '2' },
+			{ "verbose", 0, 0, 'v' },
 			{ "quad",    0, 0, '4' },
 			{ NULL, 0, 0, 0 },
 		};
 		int c;
 
-		c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24", lopts, NULL);
+		c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24p:v", lopts, NULL);
 
 		if (c == -1)
 			break;
@@ -165,9 +221,15 @@
 		case 'N':
 			mode |= SPI_NO_CS;
 			break;
+		case 'v':
+			verbose = 1;
+			break;
 		case 'R':
 			mode |= SPI_READY;
 			break;
+		case 'p':
+			input_tx = optarg;
+			break;
 		case '2':
 			mode |= SPI_TX_DUAL;
 			break;
@@ -191,6 +253,9 @@
 {
 	int ret = 0;
 	int fd;
+	uint8_t *tx;
+	uint8_t *rx;
+	int size;
 
 	parse_opts(argc, argv);
 
@@ -235,7 +300,17 @@
 	printf("bits per word: %d\n", bits);
 	printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
 
-	transfer(fd);
+	if (input_tx) {
+		size = strlen(input_tx+1);
+		tx = malloc(size);
+		rx = malloc(size);
+		size = unespcape((char *)tx, input_tx, size);
+		transfer(fd, tx, rx, size);
+		free(rx);
+		free(tx);
+	} else {
+		transfer(fd, default_tx, default_rx, sizeof(default_tx));
+	}
 
 	close(fd);
 
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index b112efc..bc9f6fe 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -997,7 +997,7 @@
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390
+Architectures: x86, s390, arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1011,7 +1011,7 @@
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86,arm/arm64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
                                  which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
@@ -1020,7 +1020,7 @@
                                  is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
                                  accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
+ - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390,arm/arm64]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
                                  [s390]
@@ -1031,11 +1031,15 @@
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
 
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390
+Architectures: x86, s390, arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1047,6 +1051,10 @@
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
 
@@ -1967,15 +1975,25 @@
   MIPS  | KVM_REG_MIPS_CP0_STATUS       | 32
   MIPS  | KVM_REG_MIPS_CP0_CAUSE        | 32
   MIPS  | KVM_REG_MIPS_CP0_EPC          | 64
+  MIPS  | KVM_REG_MIPS_CP0_PRID         | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG       | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG1      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG2      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG3      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG4      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG5      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG7      | 32
   MIPS  | KVM_REG_MIPS_CP0_ERROREPC     | 64
   MIPS  | KVM_REG_MIPS_COUNT_CTL        | 64
   MIPS  | KVM_REG_MIPS_COUNT_RESUME     | 64
   MIPS  | KVM_REG_MIPS_COUNT_HZ         | 64
+  MIPS  | KVM_REG_MIPS_FPR_32(0..31)    | 32
+  MIPS  | KVM_REG_MIPS_FPR_64(0..31)    | 64
+  MIPS  | KVM_REG_MIPS_VEC_128(0..31)   | 128
+  MIPS  | KVM_REG_MIPS_FCR_IR           | 32
+  MIPS  | KVM_REG_MIPS_FCR_CSR          | 32
+  MIPS  | KVM_REG_MIPS_MSA_IR           | 32
+  MIPS  | KVM_REG_MIPS_MSA_CSR          | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
@@ -2029,6 +2047,25 @@
 MIPS KVM control registers (see above) have the following id bit patterns:
   0x7030 0000 0002 <reg:16>
 
+MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
+id bit patterns depending on the size of the register being accessed. They are
+always accessed according to the current guest FPU mode (Status.FR and
+Config5.FRE), i.e. as the guest would see them, and they become unpredictable
+if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
+registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
+overlap the FPU registers:
+  0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
+  0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
+  0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
+
+MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 01 <0:3> <reg:5>
+
+MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 02 <0:3> <reg:5>
+
 
 4.69 KVM_GET_ONE_REG
 
@@ -2234,7 +2271,7 @@
 4.75 KVM_IRQFD
 
 Capability: KVM_CAP_IRQFD
-Architectures: x86 s390
+Architectures: x86 s390 arm arm64
 Type: vm ioctl
 Parameters: struct kvm_irqfd (in)
 Returns: 0 on success, -1 on error
@@ -2260,6 +2297,10 @@
 irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
 and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
 
+On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
+Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
+given by gsi + 32.
+
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 Capability: KVM_CAP_PPC_ALLOC_HTAB
@@ -2716,6 +2757,227 @@
    eax, ebx, ecx, edx: the values returned by the cpuid instruction for
          this function/index combination
 
+4.89 KVM_S390_MEM_OP
+
+Capability: KVM_CAP_S390_MEM_OP
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_mem_op (in)
+Returns: = 0 on success,
+         < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+         > 0 if an exception occurred while walking the page tables
+
+Read or write data from/to the logical (virtual) memory of a VPCU.
+
+Parameters are specified via the following structure:
+
+struct kvm_s390_mem_op {
+	__u64 gaddr;		/* the guest address */
+	__u64 flags;		/* flags */
+	__u32 size;		/* amount of bytes */
+	__u32 op;		/* type of operation */
+	__u64 buf;		/* buffer in userspace */
+	__u8 ar;		/* the access register number */
+	__u8 reserved[31];	/* should be set to 0 */
+};
+
+The type of operation is specified in the "op" field. It is either
+KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
+KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
+KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
+whether the corresponding memory access would create an access exception
+(without touching the data in the memory at the destination). In case an
+access exception occurred while walking the MMU tables of the guest, the
+ioctl returns a positive error number to indicate the type of exception.
+This exception is also raised directly at the corresponding VCPU if the
+flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
+
+The start address of the memory region has to be specified in the "gaddr"
+field, and the length of the region in the "size" field. "buf" is the buffer
+supplied by the userspace application where the read data should be written
+to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
+is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
+when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
+register number to be used.
+
+The "reserved" field is meant for future extensions. It is not used by
+KVM with the currently defined set of flags.
+
+4.90 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+         keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+	__u64 start_gfn;
+	__u64 count;
+	__u64 skeydata_addr;
+	__u32 flags;
+	__u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.91 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
+4.92 KVM_S390_IRQ
+
+Capability: KVM_CAP_S390_INJECT_IRQ
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq (in)
+Returns: 0 on success, -1 on error
+Errors:
+  EINVAL: interrupt type is invalid
+          type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+          type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
+            than the maximum of VCPUs
+  EBUSY:  type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
+          type is KVM_S390_SIGP_STOP and a stop irq is already pending
+          type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
+            is already pending
+
+Allows to inject an interrupt to the guest.
+
+Using struct kvm_s390_irq as a parameter allows
+to inject additional payload which is not
+possible via KVM_S390_INTERRUPT.
+
+Interrupt parameters are passed via kvm_s390_irq:
+
+struct kvm_s390_irq {
+	__u64 type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
+		struct kvm_s390_mchk_info mchk;
+		char reserved[64];
+	} u;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+KVM_S390_RESTART - restart; no parameters
+KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+
+
+Note that the vcpu ioctl is asynchronous to vcpu execution.
+
+4.94 KVM_S390_GET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (out)
+Returns: >= number of bytes copied into buffer,
+         -EINVAL if buffer size is 0,
+         -ENOBUFS if buffer size is too small to fit all pending interrupts,
+         -EFAULT if the buffer address was invalid
+
+This ioctl allows userspace to retrieve the complete state of all currently
+pending interrupts in a single buffer. Use cases include migration
+and introspection. The parameter structure contains the address of a
+userspace buffer and its length:
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;
+	__u32 len;
+	__u32 reserved[4];
+};
+
+Userspace passes in the above struct and for each pending interrupt a
+struct kvm_s390_irq is copied to the provided buffer.
+
+If -ENOBUFS is returned the buffer provided was too small and userspace
+may retry with a bigger buffer.
+
+4.95 KVM_S390_SET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (in)
+Returns: 0 on success,
+         -EFAULT if the buffer address was invalid,
+         -EINVAL for an invalid buffer length (see below),
+         -EBUSY if there were already interrupts pending,
+         errors occurring when actually injecting the
+          interrupt. See KVM_S390_IRQ.
+
+This ioctl allows userspace to set the complete state of all cpu-local
+interrupts currently pending for the vcpu. It is intended for restoring
+interrupt state after a migration. The input parameter is a userspace buffer
+containing a struct kvm_s390_irq_state:
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 len;
+	__u32 pad;
+};
+
+The userspace memory referenced by buf contains a struct kvm_s390_irq
+for each interrupt to be injected into the guest.
+If one of the interrupts could not be injected for some reason the
+ioctl aborts.
+
+len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
+and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
+which is the maximum number of possibly pending cpu-local interrupts.
+
 5. The kvm_run structure
 ------------------------
 
@@ -3189,6 +3451,31 @@
 This capability enables the in-kernel irqchip for s390. Please refer to
 "4.24 KVM_CREATE_IRQCHIP" for details.
 
+6.9 KVM_CAP_MIPS_FPU
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the host Floating Point Unit by the guest. It
+allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
+done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
+(depending on the current guest FPU register mode), and the Status.FR,
+Config5.FRE bits are accessible via the KVM API and also from the guest,
+depending on them being supported by the FPU.
+
+6.10 KVM_CAP_MIPS_MSA
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
+It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
+Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
+accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
+the guest.
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
@@ -3248,3 +3535,41 @@
 Only privileged operation exceptions will be checked for in the kernel (or even
 in the hardware prior to interception). If this capability is not enabled, the
 old way of handling SIGP orders is used (partially in kernel and user space).
+
+7.3 KVM_CAP_S390_VECTOR_REGISTERS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, negative value on error
+
+Allows use of the vector registers introduced with z13 processor, and
+provides for the synchronization between host and user space.  Will
+return -EINVAL if the machine does not support vectors.
+
+7.4 KVM_CAP_S390_USER_STSI
+
+Architectures: s390
+Parameters: none
+
+This capability allows post-handlers for the STSI instruction. After
+initial handling in the kernel, KVM exits to user space with
+KVM_EXIT_S390_STSI to allow user space to insert further data.
+
+Before exiting to userspace, kvm handlers should fill in s390_stsi field of
+vcpu->run:
+struct {
+	__u64 addr;
+	__u8 ar;
+	__u8 reserved;
+	__u8 fc;
+	__u8 sel1;
+	__u16 sel2;
+} s390_stsi;
+
+@addr - guest address of STSI SYSIB
+@fc   - function code
+@sel1 - selector 1
+@sel2 - selector 2
+@ar   - access register number
+
+KVM handlers should exit to userspace with rc = -EREMOTE.
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 4ceef53..d1ad9d5 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -27,6 +27,9 @@
     Copies all floating interrupts into a buffer provided by userspace.
     When the buffer is too small it returns -ENOMEM, which is the indication
     for userspace to try again with a bigger buffer.
+    -ENOBUFS is returned when the allocation of a kernelspace buffer has
+    failed.
+    -EFAULT is returned when copying data to userspace failed.
     All interrupts remain pending, i.e. are not deleted from the list of
     currently pending interrupts.
     attr->addr contains the userspace address of the buffer into which all
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index a75e3ad..88b8589 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -406,6 +406,12 @@
 	- If 0, the protected-mode code is loaded at 0x10000.
 	- If 1, the protected-mode code is loaded at 0x100000.
 
+  Bit 1 (kernel internal): ALSR_FLAG
+	- Used internally by the compressed kernel to communicate
+	  KASLR status to kernel proper.
+	  If 1, KASLR enabled.
+	  If 0, KASLR disabled.
+
   Bit 5 (write): QUIET_FLAG
 	- If 0, print early messages.
 	- If 1, suppress early messages.
diff --git a/MAINTAINERS b/MAINTAINERS
index 728a80f..45825b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3139,12 +3139,15 @@
 F:	Documentation/hwmon/da90??
 F:	drivers/gpio/gpio-da90??.c
 F:	drivers/hwmon/da90??-hwmon.c
+F:	drivers/iio/adc/da91??-*.c
 F:	drivers/input/misc/da90??_onkey.c
 F:	drivers/input/touchscreen/da9052_tsi.c
 F:	drivers/leds/leds-da90??.c
 F:	drivers/mfd/da903x.c
 F:	drivers/mfd/da90??-*.c
+F:	drivers/mfd/da91??-*.c
 F:	drivers/power/da9052-battery.c
+F:	drivers/power/da91??-*.c
 F:	drivers/regulator/da903x.c
 F:	drivers/regulator/da9???-regulator.[ch]
 F:	drivers/rtc/rtc-da90??.c
@@ -3154,6 +3157,7 @@
 F:	include/linux/mfd/da9052/
 F:	include/linux/mfd/da9055/
 F:	include/linux/mfd/da9063/
+F:	include/linux/mfd/da9150/
 F:	include/sound/da[79]*.h
 F:	sound/soc/codecs/da[79]*.[ch]
 
@@ -5593,6 +5597,8 @@
 F:	Documentation/*/kvm*.txt
 F:	Documentation/virtual/kvm/
 F:	arch/*/kvm/
+F:	arch/x86/kernel/kvm.c
+F:	arch/x86/kernel/kvmclock.c
 F:	arch/*/include/asm/kvm*
 F:	include/linux/kvm*
 F:	include/uapi/linux/kvm*
@@ -6566,10 +6572,8 @@
 F:	include/linux/mfd/
 
 MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
-M:	Chris Ball <chris@printf.net>
 M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-mmc@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
 T:	git git://git.linaro.org/people/ulf.hansson/mmc.git
 S:	Maintained
 F:	drivers/mmc/
@@ -8561,6 +8565,7 @@
 F:	kernel/time/clocksource.c
 F:	kernel/time/time*.c
 F:	kernel/time/ntp.c
+F:	tools/testing/selftests/timers/
 
 SC1200 WDT DRIVER
 M:	Zwane Mwaikambo <zwanem@gmail.com>
@@ -8669,10 +8674,8 @@
 F:	drivers/mmc/host/sdricoh_cs.c
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
-M:	Chris Ball <chris@printf.net>
 L:	linux-mmc@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
-S:	Maintained
+S:	Orphan
 F:	drivers/mmc/host/sdhci.*
 F:	drivers/mmc/host/sdhci-pltfm.[ch]
 
@@ -8688,18 +8691,12 @@
 K:	\bsecure_computing
 K:	\bTIF_SECCOMP\b
 
-SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
-M:	Anton Vorontsov <anton@enomsg.org>
-L:	linuxppc-dev@lists.ozlabs.org
-L:	linux-mmc@vger.kernel.org
-S:	Maintained
-F:	drivers/mmc/host/sdhci-pltfm.[ch]
-
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER
 M:	Ben Dooks <ben-linux@fluff.org>
+M:	Jaehoon Chung <jh80.chung@samsung.com>
 L:	linux-mmc@vger.kernel.org
 S:	Maintained
-F:	drivers/mmc/host/sdhci-s3c.c
+F:	drivers/mmc/host/sdhci-s3c*
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER
 M:	Viresh Kumar <viresh.linux@gmail.com>
diff --git a/Makefile b/Makefile
index 54430f9..9b76ce1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
@@ -779,6 +779,7 @@
 # check for 'asm goto'
 ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
 	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
 include $(srctree)/scripts/Makefile.kasan
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 98a1525..82f738e 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -338,6 +338,8 @@
 
 		bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops,
 					hose, &resources);
+		if (!bus)
+			continue;
 		hose->bus = bus;
 		hose->need_domain_info = need_domain_info;
 		next_busno = bus->busn_res.end + 1;
@@ -353,6 +355,11 @@
 
 	pci_assign_unassigned_resources();
 	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
+	for (hose = hose_head; hose; hose = hose->next) {
+		bus = hose->bus;
+		if (bus)
+			pci_bus_add_devices(bus);
+	}
 }
 
 
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index c8d284d..f535a3f 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -116,7 +116,7 @@
 }
 
 static int
-alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
 {
 	int retval = 0;
 	int real_seconds, real_minutes, cmos_minutes;
@@ -211,7 +211,7 @@
 static const struct rtc_class_ops alpha_rtc_ops = {
 	.read_time = alpha_rtc_read_time,
 	.set_time = alpha_rtc_set_time,
-	.set_mmss = alpha_rtc_set_mmss,
+	.set_mmss64 = alpha_rtc_set_mmss,
 	.ioctl = alpha_rtc_ioctl,
 };
 
@@ -276,7 +276,7 @@
 }
 
 static int
-remote_set_mmss(struct device *dev, unsigned long now)
+remote_set_mmss(struct device *dev, time64_t now)
 {
 	union remote_data x;
 	if (smp_processor_id() != boot_cpuid) {
@@ -290,7 +290,7 @@
 static const struct rtc_class_ops remote_rtc_ops = {
 	.read_time = remote_read_time,
 	.set_time = remote_set_time,
-	.set_mmss = remote_set_mmss,
+	.set_mmss64 = remote_set_mmss,
 	.ioctl = alpha_rtc_ioctl,
 };
 #endif
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 837c0fa..700686d 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -207,6 +207,9 @@
 
 	/* Scan our single hose.  */
 	bus = pci_scan_bus(0, alpha_mv.pci_ops, hose);
+	if (!bus)
+		return;
+
 	hose->bus = bus;
 	pcibios_claim_one_bus(bus);
 
@@ -253,6 +256,7 @@
 	   for the root bus, so just clear it. */
 	bus->self = NULL;
 	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
+	pci_bus_add_devices(bus);
 }
 
 /*
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 1943fc3..8a099bc1 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -15,7 +15,7 @@
 
 / {
 	compatible = "ti,am4372", "ti,am43";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&wakeupgen>;
 
 
 	aliases {
@@ -48,6 +48,15 @@
 		#interrupt-cells = <3>;
 		reg = <0x48241000 0x1000>,
 		      <0x48240100 0x0100>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	l2-cache-controller@48242000 {
diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index f84d971..26956cb 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@@ -352,7 +352,6 @@
 		reg = <0x24>;
 		compatible = "ti,tps65218";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* NMIn */
-		interrupt-parent = <&gic>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 832d243..8ae29c9 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -392,7 +392,6 @@
 	tps@24 {
 		compatible = "ti,tps65218";
 		reg = <0x24>;
-		interrupt-parent = <&gic>;
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 257c099..1d71091 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -369,7 +369,6 @@
 		reg = <0x24>;
 		compatible = "ti,tps65218";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* NMIn */
-		interrupt-parent = <&gic>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 6463f9e..bd48dba 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -454,7 +454,6 @@
 	mcp_rtc: rtc@6f {
 		compatible = "microchip,mcp7941x";
 		reg = <0x6f>;
-		interrupt-parent = <&gic>;
 		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>;  /* IRQ_SYS_1N */
 
 		pinctrl-names = "default";
@@ -477,7 +476,7 @@
 
 &uart3 {
 	status = "okay";
-	interrupts-extended = <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
+	interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
 			      <&dra7_pmx_core 0x248>;
 
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 7563d7c..b1bd06c 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -444,7 +444,7 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart1_pins>;
-	interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
+	interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
 			      <&dra7_pmx_core 0x3e0>;
 };
 
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index c4659a9..a0afce7 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -13,14 +13,13 @@
 #include "skeleton.dtsi"
 
 #define MAX_SOURCES 400
-#define DIRECT_IRQ(irq) (MAX_SOURCES + irq)
 
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
 
 	compatible = "ti,dra7xx";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&crossbar_mpu>;
 
 	aliases {
 		i2c0 = &i2c1;
@@ -50,18 +49,27 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 
 	gic: interrupt-controller@48211000 {
 		compatible = "arm,cortex-a15-gic";
 		interrupt-controller;
 		#interrupt-cells = <3>;
-		arm,routable-irqs = <192>;
 		reg = <0x48211000 0x1000>,
 		      <0x48212000 0x1000>,
 		      <0x48214000 0x2000>,
 		      <0x48216000 0x2000>;
 		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	/*
@@ -91,8 +99,8 @@
 		ti,hwmods = "l3_main_1", "l3_main_2";
 		reg = <0x44000000 0x1000000>,
 		      <0x45000000 0x1000>;
-		interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI DIRECT_IRQ(10) IRQ_TYPE_LEVEL_HIGH>;
+		interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+				      <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
 
 		prm: prm@4ae06000 {
 			compatible = "ti,dra7-prm";
@@ -344,7 +352,7 @@
 		uart1: serial@4806a000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806a000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart1";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -355,7 +363,7 @@
 		uart2: serial@4806c000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -366,7 +374,7 @@
 		uart3: serial@48020000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -377,7 +385,7 @@
 		uart4: serial@4806e000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
                         status = "disabled";
@@ -388,7 +396,7 @@
 		uart5: serial@48066000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48066000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart5";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -399,7 +407,7 @@
 		uart6: serial@48068000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48068000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart6";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -410,7 +418,7 @@
 		uart7: serial@48420000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48420000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart7";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -419,7 +427,7 @@
 		uart8: serial@48422000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48422000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart8";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -428,7 +436,7 @@
 		uart9: serial@48424000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48424000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart9";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -437,7 +445,7 @@
 		uart10: serial@4ae2b000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4ae2b000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart10";
 			clock-frequency = <48000000>;
 			status = "disabled";
@@ -1335,9 +1343,12 @@
 			status = "disabled";
 		};
 
-		crossbar_mpu: crossbar@4a020000 {
+		crossbar_mpu: crossbar@4a002a48 {
 			compatible = "ti,irq-crossbar";
 			reg = <0x4a002a48 0x130>;
+			interrupt-controller;
+			interrupt-parent = <&wakeupgen>;
+			#interrupt-cells = <3>;
 			ti,max-irqs = <160>;
 			ti,max-crossbar-sources = <MAX_SOURCES>;
 			ti,reg-size = <2>;
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index 40ed539..daf2811 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -158,7 +158,6 @@
 		pinctrl-0 = <&tps65917_pins_default>;
 
 		interrupts = <GIC_SPI 2 IRQ_TYPE_NONE>;  /* IRQ_SYS_1N */
-		interrupt-parent = <&gic>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 
diff --git a/arch/arm/boot/dts/dra72x.dtsi b/arch/arm/boot/dts/dra72x.dtsi
index e5a3d23..f7fb0d0 100644
--- a/arch/arm/boot/dts/dra72x.dtsi
+++ b/arch/arm/boot/dts/dra72x.dtsi
@@ -25,6 +25,7 @@
 
 	pmu {
 		compatible = "arm,cortex-a15-pmu";
-		interrupts = <GIC_SPI DIRECT_IRQ(131) IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-parent = <&wakeupgen>;
+		interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
 	};
 };
diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi
index 10173fa..00eeed7 100644
--- a/arch/arm/boot/dts/dra74x.dtsi
+++ b/arch/arm/boot/dts/dra74x.dtsi
@@ -41,8 +41,9 @@
 
 	pmu {
 		compatible = "arm,cortex-a15-pmu";
-		interrupts = <GIC_SPI DIRECT_IRQ(131) IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI DIRECT_IRQ(132) IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-parent = <&wakeupgen>;
+		interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
 	ocp {
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index ac6b0ae..14ab515 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -131,6 +131,9 @@
 		pmu_system_controller: system-controller@10020000 {
 			compatible = "samsung,exynos3250-pmu", "syscon";
 			reg = <0x10020000 0x4000>;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			interrupt-parent = <&gic>;
 		};
 
 		mipi_phy: video-phy@10020710 {
@@ -185,6 +188,7 @@
 			compatible = "samsung,exynos3250-rtc";
 			reg = <0x10070000 0x100>;
 			interrupts = <0 73 0>, <0 74 0>;
+			interrupt-parent = <&pmu_system_controller>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index 77ea547..e20cdc2 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -154,6 +154,9 @@
 	pmu_system_controller: system-controller@10020000 {
 		compatible = "samsung,exynos4210-pmu", "syscon";
 		reg = <0x10020000 0x4000>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	dsi_0: dsi@11C80000 {
@@ -266,6 +269,7 @@
 	rtc@10070000 {
 		compatible = "samsung,s3c6410-rtc";
 		reg = <0x10070000 0x100>;
+		interrupt-parent = <&pmu_system_controller>;
 		interrupts = <0 44 0>, <0 45 0>;
 		clocks = <&clock CLK_RTC>;
 		clock-names = "rtc";
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index adbde1a..77f656e 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -205,6 +205,9 @@
 		clock-names = "clkout16";
 		clocks = <&clock CLK_FIN_PLL>;
 		#clock-cells = <1>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	sysreg_system_controller: syscon@10050000 {
@@ -241,6 +244,7 @@
 	rtc: rtc@101E0000 {
 		clocks = <&clock CLK_RTC>;
 		clock-names = "rtc";
+		interrupt-parent = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index c0e98cf..b3d2d53 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -327,6 +327,7 @@
 	rtc: rtc@101E0000 {
 		clocks = <&clock CLK_RTC>;
 		clock-names = "rtc";
+		interrupt-parent = <&pmu_system_controller>;
 		status = "disabled";
 	};
 
@@ -770,6 +771,9 @@
 		clock-names = "clkout16";
 		clocks = <&clock CLK_FIN_PLL>;
 		#clock-cells = <1>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	sysreg_system_controller: syscon@10050000 {
diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi
index e860ccd..f2a94fa 100644
--- a/arch/arm/boot/dts/omap4-duovero.dtsi
+++ b/arch/arm/boot/dts/omap4-duovero.dtsi
@@ -173,14 +173,12 @@
 	twl: twl@48 {
 		reg = <0x48>;
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;		/* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
 		compatible = "ti,twl6040";
 		reg = <0x4b>;
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;		/* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio6 0 GPIO_ACTIVE_HIGH>;		/* gpio_160 */
 
 		vio-supply = <&v1v8>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 1505135..7c15fb2 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -372,7 +372,6 @@
 		reg = <0x48>;
 		/* IRQ# = 7 */
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
@@ -384,7 +383,6 @@
 
 		/* IRQ# = 119 */
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio4 31 GPIO_ACTIVE_HIGH>;  /* gpio line 127 */
 
 		vio-supply = <&v1v8>;
@@ -479,17 +477,17 @@
 };
 
 &uart2 {
-	interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART2_RX>;
 };
 
 &uart3 {
-	interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART3_RX>;
 };
 
 &uart4 {
-	interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART4_RX>;
 };
 
diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts
index 3e1da43..8aca8da 100644
--- a/arch/arm/boot/dts/omap4-sdp.dts
+++ b/arch/arm/boot/dts/omap4-sdp.dts
@@ -363,7 +363,6 @@
 		reg = <0x48>;
 		/* SPI = 0, IRQ# = 7, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
@@ -375,7 +374,6 @@
 
 		/* SPI = 0, IRQ# = 119, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio4 31 0>;  /* gpio line 127 */
 
 		vio-supply = <&v1v8>;
@@ -570,21 +568,21 @@
 };
 
 &uart2 {
-	interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART2_RX>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart2_pins>;
 };
 
 &uart3 {
-	interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART3_RX>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart3_pins>;
 };
 
 &uart4 {
-	interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
+	interrupts-extended = <&wakeupgen GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH
 			       &omap4_pmx_core OMAP4_UART4_RX>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart4_pins>;
diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
index 062701e..a4f1ba2 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
@@ -185,7 +185,6 @@
 		reg = <0x48>;
 		/* SPI = 0, IRQ# = 7, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */
-		interrupt-parent = <&gic>;
 	};
 
 	twl6040: twl@4b {
@@ -197,7 +196,6 @@
 
 		/* SPI = 0, IRQ# = 119, 4 = active high level-sensitive */
 		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio6 22 0>; /* gpio 182 */
 
 		vio-supply = <&v1v8>;
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 87401d9..f2091d1 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -14,7 +14,7 @@
 
 / {
 	compatible = "ti,omap4430", "ti,omap4";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&wakeupgen>;
 
 	aliases {
 		i2c0 = &i2c1;
@@ -56,6 +56,7 @@
 		#interrupt-cells = <3>;
 		reg = <0x48241000 0x1000>,
 		      <0x48240100 0x0100>;
+		interrupt-parent = <&gic>;
 	};
 
 	L2: l2-cache-controller@48242000 {
@@ -70,6 +71,15 @@
 		clocks = <&mpu_periphclk>;
 		reg = <0x48240600 0x20>;
 		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	/*
@@ -319,7 +329,7 @@
 		uart2: serial@4806c000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 		};
@@ -327,7 +337,7 @@
 		uart3: serial@48020000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 		};
@@ -335,7 +345,7 @@
 		uart4: serial@4806e000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
 		};
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index b54b271..61ad2ea 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -412,7 +412,6 @@
 	palmas: palmas@48 {
 		compatible = "ti,palmas";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */
-		interrupt-parent = <&gic>;
 		reg = <0x48>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index 159720d..74777a6 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts
@@ -311,7 +311,6 @@
 	palmas: palmas@48 {
 		compatible = "ti,palmas";
 		interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */
-		interrupt-parent = <&gic>;
 		reg = <0x48>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
@@ -521,7 +520,6 @@
 		pinctrl-0 = <&twl6040_pins>;
 
 		interrupts = <GIC_SPI 119 IRQ_TYPE_NONE>; /* IRQ_SYS_2N cascaded to gic */
-		interrupt-parent = <&gic>;
 		ti,audpwron-gpio = <&gpio5 13 0>;  /* gpio line 141 */
 
 		vio-supply = <&smps7_reg>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 4a485b6..77b5f70d 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -18,7 +18,7 @@
 	#size-cells = <1>;
 
 	compatible = "ti,omap5";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&wakeupgen>;
 
 	aliases {
 		i2c0 = &i2c1;
@@ -79,6 +79,7 @@
 			     <GIC_PPI 14 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 
 	pmu {
@@ -95,6 +96,15 @@
 		      <0x48212000 0x1000>,
 		      <0x48214000 0x2000>,
 		      <0x48216000 0x2000>;
+		interrupt-parent = <&gic>;
+	};
+
+	wakeupgen: interrupt-controller@48281000 {
+		compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu";
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		reg = <0x48281000 0x1000>;
+		interrupt-parent = <&gic>;
 	};
 
 	/*
@@ -458,7 +468,7 @@
 		uart1: serial@4806a000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806a000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart1";
 			clock-frequency = <48000000>;
 		};
@@ -466,7 +476,7 @@
 		uart2: serial@4806c000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806c000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart2";
 			clock-frequency = <48000000>;
 		};
@@ -474,7 +484,7 @@
 		uart3: serial@48020000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48020000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart3";
 			clock-frequency = <48000000>;
 		};
@@ -482,7 +492,7 @@
 		uart4: serial@4806e000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x4806e000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart4";
 			clock-frequency = <48000000>;
 		};
@@ -490,7 +500,7 @@
 		uart5: serial@48066000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48066000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart5";
 			clock-frequency = <48000000>;
 		};
@@ -498,7 +508,7 @@
 		uart6: serial@48068000 {
 			compatible = "ti,omap4-uart";
 			reg = <0x48068000 0x100>;
-			interrupts-extended = <&gic GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
 			ti,hwmods = "uart6";
 			clock-frequency = <48000000>;
 		};
@@ -883,14 +893,12 @@
 			usbhsohci: ohci@4a064800 {
 				compatible = "ti,ohci-omap3";
 				reg = <0x4a064800 0x400>;
-				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
 			};
 
 			usbhsehci: ehci@4a064c00 {
 				compatible = "ti,ehci-omap";
 				reg = <0x4a064c00 0x400>;
-				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
 			};
 		};
diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 4296b53..f58a3d9 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -8,7 +8,7 @@
 
 / {
 	compatible = "nvidia,tegra114";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&lic>;
 
 	host1x@50000000 {
 		compatible = "nvidia,tegra114-host1x", "simple-bus";
@@ -134,6 +134,19 @@
 		      <0x50046000 0x2000>;
 		interrupts = <GIC_PPI 9
 			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
+	};
+
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra114-ictlr", "nvidia,tegra30-ictlr";
+		reg = <0x60004000 0x100>,
+		      <0x60004100 0x50>,
+		      <0x60004200 0x50>,
+		      <0x60004300 0x50>,
+		      <0x60004400 0x50>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
 	};
 
 	timer@60005000 {
@@ -766,5 +779,6 @@
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
 			<GIC_PPI 10
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 };
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 4be06c6..db85695 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -10,7 +10,7 @@
 
 / {
 	compatible = "nvidia,tegra124";
-	interrupt-parent = <&gic>;
+	interrupt-parent = <&lic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
 
@@ -173,6 +173,7 @@
 		      <0x0 0x50046000 0x0 0x2000>;
 		interrupts = <GIC_PPI 9
 			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		interrupt-parent = <&gic>;
 	};
 
 	gpu@0,57000000 {
@@ -190,6 +191,18 @@
 		status = "disabled";
 	};
 
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr";
+		reg = <0x0 0x60004000 0x0 0x100>,
+		      <0x0 0x60004100 0x0 0x100>,
+		      <0x0 0x60004200 0x0 0x100>,
+		      <0x0 0x60004300 0x0 0x100>,
+		      <0x0 0x60004400 0x0 0x100>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&gic>;
+	};
+
 	timer@0,60005000 {
 		compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer";
 		reg = <0x0 0x60005000 0x0 0x400>;
@@ -955,5 +968,6 @@
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10
 				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&gic>;
 	};
 };
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index e5527f7..adf6b04 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -7,7 +7,7 @@
 
 / {
 	compatible = "nvidia,tegra20";
-	interrupt-parent = <&intc>;
+	interrupt-parent = <&lic>;
 
 	host1x@50000000 {
 		compatible = "nvidia,tegra20-host1x", "simple-bus";
@@ -142,6 +142,7 @@
 
 	timer@50040600 {
 		compatible = "arm,cortex-a9-twd-timer";
+		interrupt-parent = <&intc>;
 		reg = <0x50040600 0x20>;
 		interrupts = <GIC_PPI 13
 			(GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
@@ -154,6 +155,7 @@
 		       0x50040100 0x0100>;
 		interrupt-controller;
 		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
 	};
 
 	cache-controller@50043000 {
@@ -165,6 +167,17 @@
 		cache-level = <2>;
 	};
 
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra20-ictlr";
+		reg = <0x60004000 0x100>,
+		      <0x60004100 0x50>,
+		      <0x60004200 0x50>,
+		      <0x60004300 0x50>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
+	};
+
 	timer@60005000 {
 		compatible = "nvidia,tegra20-timer";
 		reg = <0x60005000 0x60>;
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index db4810d..60e205a 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -8,7 +8,7 @@
 
 / {
 	compatible = "nvidia,tegra30";
-	interrupt-parent = <&intc>;
+	interrupt-parent = <&lic>;
 
 	pcie-controller@00003000 {
 		compatible = "nvidia,tegra30-pcie";
@@ -228,6 +228,7 @@
 	timer@50040600 {
 		compatible = "arm,cortex-a9-twd-timer";
 		reg = <0x50040600 0x20>;
+		interrupt-parent = <&intc>;
 		interrupts = <GIC_PPI 13
 			(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
 		clocks = <&tegra_car TEGRA30_CLK_TWD>;
@@ -239,6 +240,7 @@
 		       0x50040100 0x0100>;
 		interrupt-controller;
 		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
 	};
 
 	cache-controller@50043000 {
@@ -250,6 +252,18 @@
 		cache-level = <2>;
 	};
 
+	lic: interrupt-controller@60004000 {
+		compatible = "nvidia,tegra30-ictlr";
+		reg = <0x60004000 0x100>,
+		      <0x60004100 0x50>,
+		      <0x60004200 0x50>,
+		      <0x60004300 0x50>,
+		      <0x60004400 0x50>;
+		interrupt-controller;
+		#interrupt-cells = <3>;
+		interrupt-parent = <&intc>;
+	};
+
 	timer@60005000 {
 		compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
 		reg = <0x60005000 0x400>;
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 6eaddc4..37dc0fe 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -151,8 +151,6 @@
 	unsigned int mpidr, this_cpu, that_cpu;
 	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
 	struct completion inbound_alive;
-	struct tick_device *tdev;
-	enum clock_event_mode tdev_mode;
 	long volatile *handshake_ptr;
 	int ipi_nr, ret;
 
@@ -219,13 +217,7 @@
 	/* redirect GIC's SGIs to our counterpart */
 	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
 
-	tdev = tick_get_device(this_cpu);
-	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
-		tdev = NULL;
-	if (tdev) {
-		tdev_mode = tdev->evtdev->mode;
-		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
-	}
+	tick_suspend_local();
 
 	ret = cpu_pm_enter();
 
@@ -251,11 +243,7 @@
 
 	ret = cpu_pm_exit();
 
-	if (tdev) {
-		clockevents_set_mode(tdev->evtdev, tdev_mode);
-		clockevents_program_event(tdev->evtdev,
-					  tdev->evtdev->next_event, 1);
-	}
+	tick_resume_local();
 
 	trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr);
 	local_fiq_enable();
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 70f9b9b..5f337dc 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_ARM_JUMP_LABEL_H
 #define _ASM_ARM_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -27,8 +27,6 @@
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -37,4 +35,5 @@
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 816db0b..d995821 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -185,6 +185,7 @@
 #define HSR_COND	(0xfU << HSR_COND_SHIFT)
 
 #define FSC_FAULT	(0x04)
+#define FSC_ACCESS	(0x08)
 #define FSC_PERM	(0x0c)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 41008cd..d71607c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -27,6 +27,8 @@
 #include <asm/fpstate.h>
 #include <kvm/arm_arch_timer.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -165,19 +167,10 @@
 
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-			      unsigned long end)
-{
-	return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-	return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 							 unsigned long address)
 {
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index 3f83db2..d8e90c8 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -28,28 +28,6 @@
 	bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-	phys_addr_t	phys_addr;
-	u8		data[8];
-	u32		len;
-	bool		is_write;
-	void		*private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	run->mmio.phys_addr	= mmio->phys_addr;
-	run->mmio.len		= mmio->len;
-	run->mmio.is_write	= mmio->is_write;
-	memcpy(run->mmio.data, mmio->data, mmio->len);
-	run->exit_reason	= KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa);
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 90c12e1..0f79e4d 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -12,8 +12,7 @@
 
 extern void timer_tick(void);
 
-struct timespec;
-typedef void (*clock_access_fn)(struct timespec *);
+typedef void (*clock_access_fn)(struct timespec64 *);
 extern int register_persistent_clock(clock_access_fn read_boot,
 				     clock_access_fn read_persistent);
 
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 0db25bc..2499867 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -198,6 +198,9 @@
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE		0x95c1ba5e
 #define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 2d2d608..488eaac 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -190,7 +190,6 @@
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
   DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.fault.hpfar));
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
-#ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
   DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
@@ -200,14 +199,11 @@
   DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
-#ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
   DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
   DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
-#endif
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
-#endif
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
 #endif
   return 0; 
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 0cc7e58..a66e37e 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -76,7 +76,7 @@
 }
 #endif
 
-static void dummy_clock_access(struct timespec *ts)
+static void dummy_clock_access(struct timespec64 *ts)
 {
 	ts->tv_sec = 0;
 	ts->tv_nsec = 0;
@@ -85,12 +85,12 @@
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
 static clock_access_fn __read_boot_clock = dummy_clock_access;;
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	__read_persistent_clock(ts);
 }
 
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
 {
 	__read_boot_clock(ts);
 }
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 338ace7..f1f79d1 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -18,6 +18,7 @@
 
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on MMU && OF
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -26,10 +27,12 @@
 	select KVM_ARM_HOST
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select SRCU
-	depends on ARM_VIRT_EXT && ARM_LPAE
+	select MMU_NOTIFIER
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQFD
+	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	---help---
-	  Support hosting virtualized guest machines. You will also
-	  need to select one or more of the processor modules below.
+	  Support hosting virtualized guest machines.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
@@ -37,10 +40,7 @@
 	  If unsure, say N.
 
 config KVM_ARM_HOST
-	bool "KVM host support for ARM cpus."
-	depends on KVM
-	depends on MMU
-	select	MMU_NOTIFIER
+	bool
 	---help---
 	  Provides host support for ARM processors.
 
@@ -55,20 +55,4 @@
 	  large, so only choose a reasonable number that you expect to
 	  actually use.
 
-config KVM_ARM_VGIC
-	bool "KVM support for Virtual GIC"
-	depends on KVM_ARM_HOST && OF
-	select HAVE_KVM_IRQCHIP
-	default y
-	---help---
-	  Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-	bool "KVM support for Architected Timers"
-	depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
-	select HAVE_KVM_IRQCHIP
-	default y
-	---help---
-	  Adds support for the Architected Timers in virtual machines
-
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 443b8be..139e46c 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -7,7 +7,7 @@
 	plus_virt_def := -DREQUIRES_VIRT=1
 endif
 
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
 CFLAGS_arm.o := -I. $(plus_virt_def)
 CFLAGS_mmu.o := -I.
 
@@ -15,12 +15,12 @@
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-y += $(KVM)/arm/vgic.o
+obj-y += $(KVM)/arm/vgic-v2.o
+obj-y += $(KVM)/arm/vgic-v2-emul.o
+obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 5560f74..6f53645 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,8 +61,6 @@
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
-static bool vgic_present;
-
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -173,8 +171,8 @@
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
-		r = vgic_present;
-		break;
+	case KVM_CAP_IRQFD:
+	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
@@ -183,6 +181,7 @@
 	case KVM_CAP_ARM_PSCI:
 	case KVM_CAP_ARM_PSCI_0_2:
 	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_MP_STATE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -268,7 +267,7 @@
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	return 0;
+	return kvm_timer_should_fire(vcpu);
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -313,13 +312,29 @@
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	return -EINVAL;
+	if (vcpu->arch.pause)
+		mp_state->mp_state = KVM_MP_STATE_STOPPED;
+	else
+		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	return -EINVAL;
+	switch (mp_state->mp_state) {
+	case KVM_MP_STATE_RUNNABLE:
+		vcpu->arch.pause = false;
+		break;
+	case KVM_MP_STATE_STOPPED:
+		vcpu->arch.pause = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 /**
@@ -452,6 +467,11 @@
 	return 0;
 }
 
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+	return vgic_initialized(kvm);
+}
+
 static void vcpu_pause(struct kvm_vcpu *vcpu)
 {
 	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -831,8 +851,6 @@
 
 	switch (dev_id) {
 	case KVM_ARM_DEVICE_VGIC_V2:
-		if (!vgic_present)
-			return -ENXIO;
 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
@@ -847,10 +865,7 @@
 
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
-		if (vgic_present)
-			return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
-		else
-			return -ENXIO;
+		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
 		struct kvm_arm_device_addr dev_addr;
@@ -1035,10 +1050,6 @@
 	if (err)
 		goto out_free_context;
 
-#ifdef CONFIG_KVM_ARM_VGIC
-		vgic_present = true;
-#endif
-
 	/*
 	 * Init HYP architected timer support
 	 */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 384bab6..d503fbb 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,22 +109,6 @@
 	return -EINVAL;
 }
 
-#ifndef CONFIG_KVM_ARM_TIMER
-
-#define NUM_TIMER_REGS 0
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
-	return 0;
-}
-
-static bool is_timer_reg(u64 index)
-{
-	return false;
-}
-
-#else
-
 #define NUM_TIMER_REGS 3
 
 static bool is_timer_reg(u64 index)
@@ -152,8 +136,6 @@
 	return 0;
 }
 
-#endif
-
 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	void __user *uaddr = (void __user *)(long)reg->addr;
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 14d4883..35e4a3a 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -402,7 +402,6 @@
  * Assumes vcpu pointer in vcpu reg
  */
 .macro save_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
 	/* Get VGIC VCTRL base into r2 */
 	ldr	r2, [vcpu, #VCPU_KVM]
 	ldr	r2, [r2, #KVM_VGIC_VCTRL]
@@ -460,7 +459,6 @@
 	subs	r4, r4, #1
 	bne	1b
 2:
-#endif
 .endm
 
 /*
@@ -469,7 +467,6 @@
  * Assumes vcpu pointer in vcpu reg
  */
 .macro restore_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
 	/* Get VGIC VCTRL base into r2 */
 	ldr	r2, [vcpu, #VCPU_KVM]
 	ldr	r2, [r2, #KVM_VGIC_VCTRL]
@@ -501,7 +498,6 @@
 	subs	r4, r4, #1
 	bne	1b
 2:
-#endif
 .endm
 
 #define CNTHCTL_PL1PCTEN	(1 << 0)
@@ -515,7 +511,6 @@
  * Clobbers r2-r5
  */
 .macro save_timer_state
-#ifdef CONFIG_KVM_ARM_TIMER
 	ldr	r4, [vcpu, #VCPU_KVM]
 	ldr	r2, [r4, #KVM_TIMER_ENABLED]
 	cmp	r2, #0
@@ -537,7 +532,6 @@
 	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
 
 1:
-#endif
 	@ Allow physical timer/counter access for the host
 	mrc	p15, 4, r2, c14, c1, 0	@ CNTHCTL
 	orr	r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -559,7 +553,6 @@
 	bic	r2, r2, #CNTHCTL_PL1PCEN
 	mcr	p15, 4, r2, c14, c1, 0	@ CNTHCTL
 
-#ifdef CONFIG_KVM_ARM_TIMER
 	ldr	r4, [vcpu, #VCPU_KVM]
 	ldr	r2, [r4, #KVM_TIMER_ENABLED]
 	cmp	r2, #0
@@ -579,7 +572,6 @@
 	and	r2, r2, #3
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 1:
-#endif
 .endm
 
 .equ vmentry,	0
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 5d3bfc0..974b1c6 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -121,12 +121,11 @@
 	return 0;
 }
 
-static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-		      struct kvm_exit_mmio *mmio)
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 {
 	unsigned long rt;
-	int len;
-	bool is_write, sign_extend;
+	int access_size;
+	bool sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {
 		/* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@
 		return 1;
 	}
 
-	len = kvm_vcpu_dabt_get_as(vcpu);
-	if (unlikely(len < 0))
-		return len;
+	access_size = kvm_vcpu_dabt_get_as(vcpu);
+	if (unlikely(access_size < 0))
+		return access_size;
 
-	is_write = kvm_vcpu_dabt_iswrite(vcpu);
+	*is_write = kvm_vcpu_dabt_iswrite(vcpu);
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	mmio->is_write = is_write;
-	mmio->phys_addr = fault_ipa;
-	mmio->len = len;
+	*len = access_size;
 	vcpu->arch.mmio_decode.sign_extend = sign_extend;
 	vcpu->arch.mmio_decode.rt = rt;
 
@@ -165,20 +162,20 @@
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa)
 {
-	struct kvm_exit_mmio mmio;
 	unsigned long data;
 	unsigned long rt;
 	int ret;
+	bool is_write;
+	int len;
+	u8 data_buf[8];
 
 	/*
-	 * Prepare MMIO operation. First stash it in a private
-	 * structure that we can use for in-kernel emulation. If the
-	 * kernel can't handle it, copy it into run->mmio and let user
-	 * space do its magic.
+	 * Prepare MMIO operation. First decode the syndrome data we get
+	 * from the CPU. Then try if some in-kernel emulation feels
+	 * responsible, otherwise let user space do its magic.
 	 */
-
 	if (kvm_vcpu_dabt_isvalid(vcpu)) {
-		ret = decode_hsr(vcpu, fault_ipa, &mmio);
+		ret = decode_hsr(vcpu, &is_write, &len);
 		if (ret)
 			return ret;
 	} else {
@@ -188,21 +185,34 @@
 
 	rt = vcpu->arch.mmio_decode.rt;
 
-	if (mmio.is_write) {
-		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
-					       mmio.len);
+	if (is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
 
-		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
-			       fault_ipa, data);
-		mmio_write_buf(mmio.data, mmio.len, data);
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+		mmio_write_buf(data_buf, len, data);
+
+		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+				       data_buf);
 	} else {
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
 			       fault_ipa, 0);
+
+		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+				      data_buf);
 	}
 
-	if (vgic_handle_mmio(vcpu, run, &mmio))
-		return 1;
+	/* Now prepare kvm_run for the potential return to userland. */
+	run->mmio.is_write	= is_write;
+	run->mmio.phys_addr	= fault_ipa;
+	run->mmio.len		= len;
+	memcpy(run->mmio.data, data_buf, len);
 
-	kvm_prepare_mmio(run, &mmio);
+	if (!ret) {
+		/* We handled the access successfully in the kernel. */
+		kvm_handle_mmio_return(vcpu, run);
+		return 1;
+	}
+
+	run->exit_reason	= KVM_EXIT_MMIO;
 	return 0;
 }
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 5656d79..15b050d 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1330,10 +1330,51 @@
 
 out_unlock:
 	spin_unlock(&kvm->mmu_lock);
+	kvm_set_pfn_accessed(pfn);
 	kvm_release_pfn_clean(pfn);
 	return ret;
 }
 
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	pfn_t pfn;
+	bool pfn_valid = false;
+
+	trace_kvm_access_fault(fault_ipa);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+
+	pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		goto out;
+
+	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
+		*pmd = pmd_mkyoung(*pmd);
+		pfn = pmd_pfn(*pmd);
+		pfn_valid = true;
+		goto out;
+	}
+
+	pte = pte_offset_kernel(pmd, fault_ipa);
+	if (pte_none(*pte))		/* Nothing there either */
+		goto out;
+
+	*pte = pte_mkyoung(*pte);	/* Just a page... */
+	pfn = pte_pfn(*pte);
+	pfn_valid = true;
+out:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (pfn_valid)
+		kvm_set_pfn_accessed(pfn);
+}
+
 /**
  * kvm_handle_guest_abort - handles all 2nd stage aborts
  * @vcpu:	the VCPU pointer
@@ -1364,7 +1405,8 @@
 
 	/* Check the stage-2 fault is trans. fault or write fault */
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
-	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
+	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+	    fault_status != FSC_ACCESS) {
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1400,6 +1442,12 @@
 	/* Userspace should not be able to register out-of-bounds IPAs */
 	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
 
+	if (fault_status == FSC_ACCESS) {
+		handle_access_fault(vcpu, fault_ipa);
+		ret = 1;
+		goto out_unlock;
+	}
+
 	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
@@ -1408,15 +1456,16 @@
 	return ret;
 }
 
-static void handle_hva_to_gpa(struct kvm *kvm,
-			      unsigned long start,
-			      unsigned long end,
-			      void (*handler)(struct kvm *kvm,
-					      gpa_t gpa, void *data),
-			      void *data)
+static int handle_hva_to_gpa(struct kvm *kvm,
+			     unsigned long start,
+			     unsigned long end,
+			     int (*handler)(struct kvm *kvm,
+					    gpa_t gpa, void *data),
+			     void *data)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
+	int ret = 0;
 
 	slots = kvm_memslots(kvm);
 
@@ -1440,14 +1489,17 @@
 
 		for (; gfn < gfn_end; ++gfn) {
 			gpa_t gpa = gfn << PAGE_SHIFT;
-			handler(kvm, gpa, data);
+			ret |= handler(kvm, gpa, data);
 		}
 	}
+
+	return ret;
 }
 
-static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
 	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+	return 0;
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1473,7 +1525,7 @@
 	return 0;
 }
 
-static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
 	pte_t *pte = (pte_t *)data;
 
@@ -1485,6 +1537,7 @@
 	 * through this calling path.
 	 */
 	stage2_set_pte(kvm, NULL, gpa, pte, 0);
+	return 0;
 }
 
 
@@ -1501,6 +1554,67 @@
 	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		return 0;
+
+	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
+		if (pmd_young(*pmd)) {
+			*pmd = pmd_mkold(*pmd);
+			return 1;
+		}
+
+		return 0;
+	}
+
+	pte = pte_offset_kernel(pmd, gpa);
+	if (pte_none(*pte))
+		return 0;
+
+	if (pte_young(*pte)) {
+		*pte = pte_mkold(*pte);	/* Just a page... */
+		return 1;
+	}
+
+	return 0;
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pmd = stage2_get_pmd(kvm, NULL, gpa);
+	if (!pmd || pmd_none(*pmd))	/* Nothing there */
+		return 0;
+
+	if (kvm_pmd_huge(*pmd))		/* THP, HugeTLB */
+		return pmd_young(*pmd);
+
+	pte = pte_offset_kernel(pmd, gpa);
+	if (!pte_none(*pte))		/* Just a page... */
+		return pte_young(*pte);
+
+	return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	trace_kvm_age_hva(start, end);
+	return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	trace_kvm_test_age_hva(hva);
+	return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
 	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 6817664..0ec3539 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -68,6 +68,21 @@
 		  __entry->hxfar, __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_access_fault,
+	TP_PROTO(unsigned long ipa),
+	TP_ARGS(ipa),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	ipa		)
+	),
+
+	TP_fast_assign(
+		__entry->ipa		= ipa;
+	),
+
+	TP_printk("IPA: %lx", __entry->ipa)
+);
+
 TRACE_EVENT(kvm_irq_line,
 	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
 	TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -210,6 +225,39 @@
 	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
 );
 
+TRACE_EVENT(kvm_age_hva,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
 TRACE_EVENT(kvm_hvc,
 	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
 	TP_ARGS(vcpu_pc, r0, imm),
diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
index 8a275f2..91fe971 100644
--- a/arch/arm/mach-dove/pcie.c
+++ b/arch/arm/mach-dove/pcie.c
@@ -155,17 +155,13 @@
 static struct pci_bus __init *
 dove_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 {
-	struct pci_bus *bus;
-
-	if (nr < num_pcie_ports) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
-					&sys->resources);
-	} else {
-		bus = NULL;
+	if (nr >= num_pcie_ports) {
 		BUG();
+		return NULL;
 	}
 
-	return bus;
+	return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
+				 &sys->resources);
 }
 
 static int __init dove_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index 9e9dfdf..f44c2e0 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -166,16 +166,14 @@
 	exynos_map_io();
 }
 
+/*
+ * Apparently, these SoCs are not able to wake-up from suspend using
+ * the PMU. Too bad. Should they suddenly become capable of such a
+ * feat, the matches below should be moved to suspend.c.
+ */
 static const struct of_device_id exynos_dt_pmu_match[] = {
-	{ .compatible = "samsung,exynos3250-pmu" },
-	{ .compatible = "samsung,exynos4210-pmu" },
-	{ .compatible = "samsung,exynos4212-pmu" },
-	{ .compatible = "samsung,exynos4412-pmu" },
-	{ .compatible = "samsung,exynos4415-pmu" },
-	{ .compatible = "samsung,exynos5250-pmu" },
 	{ .compatible = "samsung,exynos5260-pmu" },
 	{ .compatible = "samsung,exynos5410-pmu" },
-	{ .compatible = "samsung,exynos5420-pmu" },
 	{ /*sentinel*/ },
 };
 
@@ -186,9 +184,6 @@
 	np = of_find_matching_node(NULL, exynos_dt_pmu_match);
 	if (np)
 		pmu_base_addr = of_iomap(np, 0);
-
-	if (!pmu_base_addr)
-		panic("failed to find exynos pmu register\n");
 }
 
 static void __init exynos_init_irq(void)
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 318d127..2146d91 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -18,7 +18,9 @@
 #include <linux/syscore_ops.h>
 #include <linux/cpu_pm.h>
 #include <linux/io.h>
-#include <linux/irqchip/arm-gic.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
 #include <linux/err.h>
 #include <linux/regulator/machine.h>
 
@@ -43,8 +45,8 @@
 #define EXYNOS5420_CPU_STATE	0x28
 
 /**
- * struct exynos_wkup_irq - Exynos GIC to PMU IRQ mapping
- * @hwirq: Hardware IRQ signal of the GIC
+ * struct exynos_wkup_irq - PMU IRQ to mask mapping
+ * @hwirq: Hardware IRQ signal of the PMU
  * @mask: Mask in PMU wake-up mask register
  */
 struct exynos_wkup_irq {
@@ -93,14 +95,14 @@
 };
 
 static const struct exynos_wkup_irq exynos4_wkup_irq[] = {
-	{ 76, BIT(1) }, /* RTC alarm */
-	{ 77, BIT(2) }, /* RTC tick */
+	{ 44, BIT(1) }, /* RTC alarm */
+	{ 45, BIT(2) }, /* RTC tick */
 	{ /* sentinel */ },
 };
 
 static const struct exynos_wkup_irq exynos5250_wkup_irq[] = {
-	{ 75, BIT(1) }, /* RTC alarm */
-	{ 76, BIT(2) }, /* RTC tick */
+	{ 43, BIT(1) }, /* RTC alarm */
+	{ 44, BIT(2) }, /* RTC tick */
 	{ /* sentinel */ },
 };
 
@@ -167,6 +169,113 @@
 	return -ENOENT;
 }
 
+static struct irq_chip exynos_pmu_chip = {
+	.name			= "PMU",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_wake		= exynos_irq_set_wake,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#endif
+};
+
+static int exynos_pmu_domain_xlate(struct irq_domain *domain,
+				   struct device_node *controller,
+				   const u32 *intspec,
+				   unsigned int intsize,
+				   unsigned long *out_hwirq,
+				   unsigned int *out_type)
+{
+	if (domain->of_node != controller)
+		return -EINVAL;	/* Shouldn't happen, really... */
+	if (intsize != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (intspec[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2];
+	return 0;
+}
+
+static int exynos_pmu_domain_alloc(struct irq_domain *domain,
+				   unsigned int virq,
+				   unsigned int nr_irqs, void *data)
+{
+	struct of_phandle_args *args = data;
+	struct of_phandle_args parent_args;
+	irq_hw_number_t hwirq;
+	int i;
+
+	if (args->args_count != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (args->args[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	hwirq = args->args[1];
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &exynos_pmu_chip, NULL);
+
+	parent_args = *args;
+	parent_args.np = domain->parent->of_node;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+}
+
+static struct irq_domain_ops exynos_pmu_domain_ops = {
+	.xlate	= exynos_pmu_domain_xlate,
+	.alloc	= exynos_pmu_domain_alloc,
+	.free	= irq_domain_free_irqs_common,
+};
+
+static int __init exynos_pmu_irq_init(struct device_node *node,
+				      struct device_node *parent)
+{
+	struct irq_domain *parent_domain, *domain;
+
+	if (!parent) {
+		pr_err("%s: no parent, giving up\n", node->full_name);
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%s: unable to obtain parent domain\n", node->full_name);
+		return -ENXIO;
+	}
+
+	pmu_base_addr = of_iomap(node, 0);
+
+	if (!pmu_base_addr) {
+		pr_err("%s: failed to find exynos pmu register\n",
+		       node->full_name);
+		return -ENOMEM;
+	}
+
+	domain = irq_domain_add_hierarchy(parent_domain, 0, 0,
+					  node, &exynos_pmu_domain_ops,
+					  NULL);
+	if (!domain) {
+		iounmap(pmu_base_addr);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+#define EXYNOS_PMU_IRQ(symbol, name)	OF_DECLARE_2(irqchip, symbol, name, exynos_pmu_irq_init)
+
+EXYNOS_PMU_IRQ(exynos3250_pmu_irq, "samsung,exynos3250-pmu");
+EXYNOS_PMU_IRQ(exynos4210_pmu_irq, "samsung,exynos4210-pmu");
+EXYNOS_PMU_IRQ(exynos4212_pmu_irq, "samsung,exynos4212-pmu");
+EXYNOS_PMU_IRQ(exynos4412_pmu_irq, "samsung,exynos4412-pmu");
+EXYNOS_PMU_IRQ(exynos4415_pmu_irq, "samsung,exynos4415-pmu");
+EXYNOS_PMU_IRQ(exynos5250_pmu_irq, "samsung,exynos5250-pmu");
+EXYNOS_PMU_IRQ(exynos5420_pmu_irq, "samsung,exynos5420-pmu");
+
 static int exynos_cpu_do_idle(void)
 {
 	/* issue the standby signal into the pm unit. */
@@ -615,17 +724,19 @@
 void __init exynos_pm_init(void)
 {
 	const struct of_device_id *match;
+	struct device_node *np;
 	u32 tmp;
 
-	of_find_matching_node_and_match(NULL, exynos_pmu_of_device_ids, &match);
-	if (!match) {
+	np = of_find_matching_node_and_match(NULL, exynos_pmu_of_device_ids, &match);
+	if (!np) {
 		pr_err("Failed to find PMU node\n");
 		return;
 	}
-	pm_data = (struct exynos_pm_data *) match->data;
 
-	/* Platform-specific GIC callback */
-	gic_arch_extn.irq_set_wake = exynos_irq_set_wake;
+	if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL)))
+		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+	pm_data = (struct exynos_pm_data *) match->data;
 
 	/* All wakeup disable */
 	tmp = pmu_raw_readl(S5P_WAKEUP_MASK);
diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
index e8627e0..c8dffce 100644
--- a/arch/arm/mach-imx/Kconfig
+++ b/arch/arm/mach-imx/Kconfig
@@ -631,6 +631,7 @@
 
 config SOC_VF610
 	bool "Vybrid Family VF610 support"
+	select IRQ_DOMAIN_HIERARCHY
 	select ARM_GIC
 	select PINCTRL_VF610
 	select PL310_ERRATA_769419 if CACHE_L2X0
diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c
index 445e553..097ea4c 100644
--- a/arch/arm/mach-mv78xx0/pcie.c
+++ b/arch/arm/mach-mv78xx0/pcie.c
@@ -197,17 +197,13 @@
 static struct pci_bus __init *
 mv78xx0_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 {
-	struct pci_bus *bus;
-
-	if (nr < num_pcie_ports) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
-					&sys->resources);
-	} else {
-		bus = NULL;
+	if (nr >= num_pcie_ports) {
 		BUG();
+		return NULL;
 	}
 
-	return bus;
+	return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
+				 &sys->resources);
 }
 
 static int __init mv78xx0_pcie_map_irq(const struct pci_dev *dev, u8 slot,
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index 01e398a..57d4298 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -14,7 +14,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <asm/cpuidle.h>
 #include <asm/proc-fns.h>
@@ -84,7 +84,6 @@
 {
 	struct idle_statedata *cx = state_ptr + index;
 	u32 mpuss_can_lose_context = 0;
-	int cpu_id = smp_processor_id();
 
 	/*
 	 * CPU0 has to wait and stay ON until CPU1 is OFF state.
@@ -112,7 +111,7 @@
 	mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) &&
 				 (cx->mpu_logic_state == PWRDM_POWER_OFF);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
+	tick_broadcast_enter();
 
 	/*
 	 * Call idle CPU PM enter notifier chain so that
@@ -169,7 +168,7 @@
 	if (dev->cpu == 0 && mpuss_can_lose_context)
 		cpu_cluster_pm_exit();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+	tick_broadcast_exit();
 
 fail:
 	cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
@@ -184,8 +183,7 @@
  */
 static void omap_setup_broadcast_timer(void *arg)
 {
-	int cpu = smp_processor_id();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+	tick_broadcast_enable();
 }
 
 static struct cpuidle_driver omap4_idle_driver = {
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index dc6e79c..9a8611a 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -150,9 +150,13 @@
 static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 				  *mmc_controller, int controller_nr)
 {
-	if (gpio_is_valid(mmc_controller->switch_pin) &&
-	    (mmc_controller->switch_pin < OMAP_MAX_GPIO_LINES))
-		omap_mux_init_gpio(mmc_controller->switch_pin,
+	if (gpio_is_valid(mmc_controller->gpio_cd) &&
+	    (mmc_controller->gpio_cd < OMAP_MAX_GPIO_LINES))
+		omap_mux_init_gpio(mmc_controller->gpio_cd,
+				   OMAP_PIN_INPUT_PULLUP);
+	if (gpio_is_valid(mmc_controller->gpio_cod) &&
+	    (mmc_controller->gpio_cod < OMAP_MAX_GPIO_LINES))
+		omap_mux_init_gpio(mmc_controller->gpio_cod,
 				   OMAP_PIN_INPUT_PULLUP);
 	if (gpio_is_valid(mmc_controller->gpio_wp) &&
 	    (mmc_controller->gpio_wp < OMAP_MAX_GPIO_LINES))
@@ -250,15 +254,20 @@
 	mmc->internal_clock = !c->ext_clock;
 	mmc->reg_offset = 0;
 
-	mmc->switch_pin = c->gpio_cd;
+	if (c->cover_only) {
+		/* detect if mobile phone cover removed */
+		mmc->gpio_cd = -EINVAL;
+		mmc->gpio_cod = c->gpio_cd;
+	} else {
+		/* card detect pin on the mmc socket itself */
+		mmc->gpio_cd = c->gpio_cd;
+		mmc->gpio_cod = -EINVAL;
+	}
 	mmc->gpio_wp = c->gpio_wp;
 
 	mmc->remux = c->remux;
 	mmc->init_card = c->init_card;
 
-	if (c->cover_only)
-		mmc->cover = 1;
-
 	if (c->nonremovable)
 		mmc->nonremovable = 1;
 
@@ -358,7 +367,15 @@
 		if (!mmc_pdata)
 			continue;
 
-		mmc_pdata->switch_pin = c->gpio_cd;
+		if (c->cover_only) {
+			/* detect if mobile phone cover removed */
+			mmc_pdata->gpio_cd = -EINVAL;
+			mmc_pdata->gpio_cod = c->gpio_cd;
+		} else {
+			/* card detect pin on the mmc socket itself */
+			mmc_pdata->gpio_cd = c->gpio_cd;
+			mmc_pdata->gpio_cod = -EINVAL;
+		}
 		mmc_pdata->gpio_wp = c->gpio_wp;
 
 		res = omap_device_register(pdev);
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index f961c46..3b56722 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -20,11 +20,12 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/cpu_pm.h>
-#include <linux/irqchip/arm-gic.h>
 
 #include "omap-wakeupgen.h"
 #include "omap-secure.h"
@@ -78,29 +79,12 @@
 
 static inline int _wakeupgen_get_irq_info(u32 irq, u32 *bit_posn, u8 *reg_index)
 {
-	unsigned int spi_irq;
-
-	/*
-	 * PPIs and SGIs are not supported.
-	 */
-	if (irq < OMAP44XX_IRQ_GIC_START)
-		return -EINVAL;
-
-	/*
-	 * Subtract the GIC offset.
-	 */
-	spi_irq = irq - OMAP44XX_IRQ_GIC_START;
-	if (spi_irq > MAX_IRQS) {
-		pr_err("omap wakeupGen: Invalid IRQ%d\n", irq);
-		return -EINVAL;
-	}
-
 	/*
 	 * Each WakeupGen register controls 32 interrupt.
 	 * i.e. 1 bit per SPI IRQ
 	 */
-	*reg_index = spi_irq >> 5;
-	*bit_posn = spi_irq %= 32;
+	*reg_index = irq >> 5;
+	*bit_posn = irq %= 32;
 
 	return 0;
 }
@@ -141,6 +125,7 @@
 	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
 	_wakeupgen_clear(d->hwirq, irq_target_cpu[d->hwirq]);
 	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+	irq_chip_mask_parent(d);
 }
 
 /*
@@ -153,6 +138,7 @@
 	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
 	_wakeupgen_set(d->hwirq, irq_target_cpu[d->hwirq]);
 	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+	irq_chip_unmask_parent(d);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -400,15 +386,91 @@
 	return omap_secure_apis;
 }
 
+static struct irq_chip wakeupgen_chip = {
+	.name			= "WUGEN",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= wakeupgen_mask,
+	.irq_unmask		= wakeupgen_unmask,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#endif
+};
+
+static int wakeupgen_domain_xlate(struct irq_domain *domain,
+				  struct device_node *controller,
+				  const u32 *intspec,
+				  unsigned int intsize,
+				  unsigned long *out_hwirq,
+				  unsigned int *out_type)
+{
+	if (domain->of_node != controller)
+		return -EINVAL;	/* Shouldn't happen, really... */
+	if (intsize != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (intspec[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2];
+	return 0;
+}
+
+static int wakeupgen_domain_alloc(struct irq_domain *domain,
+				  unsigned int virq,
+				  unsigned int nr_irqs, void *data)
+{
+	struct of_phandle_args *args = data;
+	struct of_phandle_args parent_args;
+	irq_hw_number_t hwirq;
+	int i;
+
+	if (args->args_count != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (args->args[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	hwirq = args->args[1];
+	if (hwirq >= MAX_IRQS)
+		return -EINVAL;	/* Can't deal with this */
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &wakeupgen_chip, NULL);
+
+	parent_args = *args;
+	parent_args.np = domain->parent->of_node;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+}
+
+static struct irq_domain_ops wakeupgen_domain_ops = {
+	.xlate	= wakeupgen_domain_xlate,
+	.alloc	= wakeupgen_domain_alloc,
+	.free	= irq_domain_free_irqs_common,
+};
+
 /*
  * Initialise the wakeupgen module.
  */
-int __init omap_wakeupgen_init(void)
+static int __init wakeupgen_init(struct device_node *node,
+				 struct device_node *parent)
 {
+	struct irq_domain *parent_domain, *domain;
 	int i;
 	unsigned int boot_cpu = smp_processor_id();
 	u32 val;
 
+	if (!parent) {
+		pr_err("%s: no parent, giving up\n", node->full_name);
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%s: unable to obtain parent domain\n", node->full_name);
+		return -ENXIO;
+	}
 	/* Not supported on OMAP4 ES1.0 silicon */
 	if (omap_rev() == OMAP4430_REV_ES1_0) {
 		WARN(1, "WakeupGen: Not supported on OMAP4430 ES1.0\n");
@@ -416,7 +478,7 @@
 	}
 
 	/* Static mapping, never released */
-	wakeupgen_base = ioremap(OMAP_WKUPGEN_BASE, SZ_4K);
+	wakeupgen_base = of_iomap(node, 0);
 	if (WARN_ON(!wakeupgen_base))
 		return -ENOMEM;
 
@@ -429,6 +491,14 @@
 		max_irqs = AM43XX_IRQS;
 	}
 
+	domain = irq_domain_add_hierarchy(parent_domain, 0, max_irqs,
+					  node, &wakeupgen_domain_ops,
+					  NULL);
+	if (!domain) {
+		iounmap(wakeupgen_base);
+		return -ENOMEM;
+	}
+
 	/* Clear all IRQ bitmasks at wakeupGen level */
 	for (i = 0; i < irq_banks; i++) {
 		wakeupgen_writel(0, i, CPU0_ID);
@@ -437,14 +507,6 @@
 	}
 
 	/*
-	 * Override GIC architecture specific functions to add
-	 * OMAP WakeupGen interrupt controller along with GIC
-	 */
-	gic_arch_extn.irq_mask = wakeupgen_mask;
-	gic_arch_extn.irq_unmask = wakeupgen_unmask;
-	gic_arch_extn.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
-
-	/*
 	 * FIXME: Add support to set_smp_affinity() once the core
 	 * GIC code has necessary hooks in place.
 	 */
@@ -474,3 +536,9 @@
 
 	return 0;
 }
+
+/*
+ * We cannot use the IRQCHIP_DECLARE macro that lives in
+ * drivers/irqchip, so we're forced to roll our own. Not very nice.
+ */
+OF_DECLARE_2(irqchip, ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init);
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.h b/arch/arm/mach-omap2/omap-wakeupgen.h
index b3c8ecc..a3491ad 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.h
+++ b/arch/arm/mach-omap2/omap-wakeupgen.h
@@ -33,7 +33,6 @@
 #define OMAP_TIMESTAMPCYCLELO			0xc08
 #define OMAP_TIMESTAMPCYCLEHI			0xc0c
 
-extern int __init omap_wakeupgen_init(void);
 extern void __iomem *omap_get_wakeupgen_base(void);
 extern int omap_secure_apis_support(void);
 #endif
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index cee0fe1..7bb116a 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -22,7 +22,6 @@
 #include <linux/of_platform.h>
 #include <linux/export.h>
 #include <linux/irqchip/arm-gic.h>
-#include <linux/irqchip/irq-crossbar.h>
 #include <linux/of_address.h>
 #include <linux/reboot.h>
 #include <linux/genalloc.h>
@@ -242,26 +241,26 @@
 }
 omap_early_initcall(omap4_sar_ram_init);
 
-static const struct of_device_id gic_match[] = {
-	{ .compatible = "arm,cortex-a9-gic", },
-	{ .compatible = "arm,cortex-a15-gic", },
+static const struct of_device_id intc_match[] = {
+	{ .compatible = "ti,omap4-wugen-mpu", },
+	{ .compatible = "ti,omap5-wugen-mpu", },
 	{ },
 };
 
-static struct device_node *gic_node;
+static struct device_node *intc_node;
 
 unsigned int omap4_xlate_irq(unsigned int hwirq)
 {
 	struct of_phandle_args irq_data;
 	unsigned int irq;
 
-	if (!gic_node)
-		gic_node = of_find_matching_node(NULL, gic_match);
+	if (!intc_node)
+		intc_node = of_find_matching_node(NULL, intc_match);
 
-	if (WARN_ON(!gic_node))
+	if (WARN_ON(!intc_node))
 		return hwirq;
 
-	irq_data.np = gic_node;
+	irq_data.np = intc_node;
 	irq_data.args_count = 3;
 	irq_data.args[0] = 0;
 	irq_data.args[1] = hwirq - OMAP44XX_IRQ_GIC_START;
@@ -278,6 +277,12 @@
 {
 	struct device_node *np;
 
+	intc_node = of_find_matching_node(NULL, intc_match);
+	if (WARN_ON(!intc_node)) {
+		pr_err("No WUGEN found in DT, system will misbehave.\n");
+		pr_err("UPDATE YOUR DEVICE TREE!\n");
+	}
+
 	/* Extract GIC distributor and TWD bases for OMAP4460 ROM Errata WA */
 	if (!cpu_is_omap446x())
 		goto skip_errata_init;
@@ -291,9 +296,5 @@
 	WARN_ON(!twd_base);
 
 skip_errata_init:
-	omap_wakeupgen_init();
-#ifdef CONFIG_IRQ_CROSSBAR
-	irqcrossbar_init();
-#endif
 	irqchip_init();
 }
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index 87a12d6..b02f394 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -540,37 +540,33 @@
 
 int __init orion5x_pci_sys_setup(int nr, struct pci_sys_data *sys)
 {
-	int ret = 0;
-
 	vga_base = ORION5X_PCIE_MEM_PHYS_BASE;
 
 	if (nr == 0) {
 		orion_pcie_set_local_bus_nr(PCIE_BASE, sys->busnr);
-		ret = pcie_setup(sys);
-	} else if (nr == 1 && !orion5x_pci_disabled) {
-		orion5x_pci_set_bus_nr(sys->busnr);
-		ret = pci_setup(sys);
+		return pcie_setup(sys);
 	}
 
-	return ret;
+	if (nr == 1 && !orion5x_pci_disabled) {
+		orion5x_pci_set_bus_nr(sys->busnr);
+		return pci_setup(sys);
+	}
+
+	return 0;
 }
 
 struct pci_bus __init *orion5x_pci_sys_scan_bus(int nr, struct pci_sys_data *sys)
 {
-	struct pci_bus *bus;
+	if (nr == 0)
+		return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
+					 &sys->resources);
 
-	if (nr == 0) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys,
-					&sys->resources);
-	} else if (nr == 1 && !orion5x_pci_disabled) {
-		bus = pci_scan_root_bus(NULL, sys->busnr, &pci_ops, sys,
-					&sys->resources);
-	} else {
-		bus = NULL;
-		BUG();
-	}
+	if (nr == 1 && !orion5x_pci_disabled)
+		return pci_scan_root_bus(NULL, sys->busnr, &pci_ops, sys,
+					 &sys->resources);
 
-	return bus;
+	BUG();
+	return NULL;
 }
 
 int __init orion5x_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index a762b23..6dc4f02 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -758,8 +758,10 @@
 	struct power_supply *psy =
 		power_supply_get_by_name(raumfeld_power_supplicants[0]);
 
-	if (psy)
+	if (psy) {
 		power_supply_set_battery_charged(psy);
+		power_supply_put(psy);
+	}
 }
 
 static int raumfeld_power_resume(void)
diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c
index 9e36180..fd63ae6 100644
--- a/arch/arm/mach-shmobile/intc-sh73a0.c
+++ b/arch/arm/mach-shmobile/intc-sh73a0.c
@@ -252,11 +252,6 @@
 	return IRQ_HANDLED;
 }
 
-static int sh73a0_set_wake(struct irq_data *data, unsigned int on)
-{
-	return 0; /* always allow wakeup */
-}
-
 #define PINTER0_PHYS 0xe69000a0
 #define PINTER1_PHYS 0xe69000a4
 #define PINTER0_VIRT IOMEM(0xe69000a0)
@@ -318,8 +313,8 @@
 	void __iomem *gic_cpu_base = IOMEM(0xf0000100);
 	void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE);
 
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE);
 	gic_init(0, 29, gic_dist_base, gic_cpu_base);
-	gic_arch_extn.irq_set_wake = sh73a0_set_wake;
 
 	register_intc_controller(&intcs_desc);
 	register_intc_controller(&intc_pint0_desc);
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 27dceaf9..c03e562 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -713,18 +713,13 @@
 }
 
 #ifdef CONFIG_USE_OF
-static int r8a7779_set_wake(struct irq_data *data, unsigned int on)
-{
-	return 0; /* always allow wakeup */
-}
-
 void __init r8a7779_init_irq_dt(void)
 {
 #ifdef CONFIG_ARCH_SHMOBILE_LEGACY
 	void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000);
 	void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000);
 #endif
-	gic_arch_extn.irq_set_wake = r8a7779_set_wake;
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE);
 
 #ifdef CONFIG_ARCH_SHMOBILE_LEGACY
 	gic_init(0, 29, gic_dist_base, gic_cpu_base);
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index f2b586d..155807f 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -15,7 +15,7 @@
  */
 
 #include <asm/firmware.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -44,7 +44,7 @@
 	tegra_set_cpu_in_lp2();
 	cpu_pm_enter();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	call_firmware_op(prepare_idle);
 
@@ -52,7 +52,7 @@
 	if (call_firmware_op(do_idle, 0) == -ENOSYS)
 		cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	cpu_pm_exit();
 	tegra_clear_cpu_in_lp2();
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index 4f25a7c..48844ae 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -20,7 +20,7 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -136,11 +136,11 @@
 	if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready())
 		return false;
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	tegra_idle_lp2_last();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	if (cpu_online(1))
 		tegra20_wake_cpu1_from_reset();
@@ -153,13 +153,13 @@
 					 struct cpuidle_driver *drv,
 					 int index)
 {
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
 
 	tegra20_cpu_clear_resettable();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	return true;
 }
diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
index f8815ed..84d809a 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra30.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra30.c
@@ -20,7 +20,7 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -76,11 +76,11 @@
 		return false;
 	}
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	tegra_idle_lp2_last();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	return true;
 }
@@ -90,13 +90,13 @@
 					struct cpuidle_driver *drv,
 					int index)
 {
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+	tick_broadcast_enter();
 
 	smp_wmb();
 
 	cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+	tick_broadcast_exit();
 
 	return true;
 }
diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h
index ee79808..81dc950 100644
--- a/arch/arm/mach-tegra/iomap.h
+++ b/arch/arm/mach-tegra/iomap.h
@@ -31,21 +31,6 @@
 #define TEGRA_ARM_INT_DIST_BASE		0x50041000
 #define TEGRA_ARM_INT_DIST_SIZE		SZ_4K
 
-#define TEGRA_PRIMARY_ICTLR_BASE	0x60004000
-#define TEGRA_PRIMARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_SECONDARY_ICTLR_BASE	0x60004100
-#define TEGRA_SECONDARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_TERTIARY_ICTLR_BASE	0x60004200
-#define TEGRA_TERTIARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_QUATERNARY_ICTLR_BASE	0x60004300
-#define TEGRA_QUATERNARY_ICTLR_SIZE	SZ_64
-
-#define TEGRA_QUINARY_ICTLR_BASE	0x60004400
-#define TEGRA_QUINARY_ICTLR_SIZE	SZ_64
-
 #define TEGRA_TMR1_BASE			0x60005000
 #define TEGRA_TMR1_SIZE			SZ_8
 
diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c
index ab95f53..3b9098d 100644
--- a/arch/arm/mach-tegra/irq.c
+++ b/arch/arm/mach-tegra/irq.c
@@ -30,43 +30,9 @@
 #include "board.h"
 #include "iomap.h"
 
-#define ICTLR_CPU_IEP_VFIQ	0x08
-#define ICTLR_CPU_IEP_FIR	0x14
-#define ICTLR_CPU_IEP_FIR_SET	0x18
-#define ICTLR_CPU_IEP_FIR_CLR	0x1c
-
-#define ICTLR_CPU_IER		0x20
-#define ICTLR_CPU_IER_SET	0x24
-#define ICTLR_CPU_IER_CLR	0x28
-#define ICTLR_CPU_IEP_CLASS	0x2C
-
-#define ICTLR_COP_IER		0x30
-#define ICTLR_COP_IER_SET	0x34
-#define ICTLR_COP_IER_CLR	0x38
-#define ICTLR_COP_IEP_CLASS	0x3c
-
-#define FIRST_LEGACY_IRQ 32
-#define TEGRA_MAX_NUM_ICTLRS	5
-
 #define SGI_MASK 0xFFFF
 
-static int num_ictlrs;
-
-static void __iomem *ictlr_reg_base[] = {
-	IO_ADDRESS(TEGRA_PRIMARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_SECONDARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_TERTIARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_QUATERNARY_ICTLR_BASE),
-	IO_ADDRESS(TEGRA_QUINARY_ICTLR_BASE),
-};
-
 #ifdef CONFIG_PM_SLEEP
-static u32 cop_ier[TEGRA_MAX_NUM_ICTLRS];
-static u32 cop_iep[TEGRA_MAX_NUM_ICTLRS];
-static u32 cpu_ier[TEGRA_MAX_NUM_ICTLRS];
-static u32 cpu_iep[TEGRA_MAX_NUM_ICTLRS];
-
-static u32 ictlr_wake_mask[TEGRA_MAX_NUM_ICTLRS];
 static void __iomem *tegra_gic_cpu_base;
 #endif
 
@@ -83,140 +49,7 @@
 	return false;
 }
 
-static inline void tegra_irq_write_mask(unsigned int irq, unsigned long reg)
-{
-	void __iomem *base;
-	u32 mask;
-
-	BUG_ON(irq < FIRST_LEGACY_IRQ ||
-		irq >= FIRST_LEGACY_IRQ + num_ictlrs * 32);
-
-	base = ictlr_reg_base[(irq - FIRST_LEGACY_IRQ) / 32];
-	mask = BIT((irq - FIRST_LEGACY_IRQ) % 32);
-
-	__raw_writel(mask, base + reg);
-}
-
-static void tegra_mask(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IER_CLR);
-}
-
-static void tegra_unmask(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IER_SET);
-}
-
-static void tegra_ack(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_CLR);
-}
-
-static void tegra_eoi(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_CLR);
-}
-
-static int tegra_retrigger(struct irq_data *d)
-{
-	if (d->hwirq < FIRST_LEGACY_IRQ)
-		return 0;
-
-	tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_SET);
-
-	return 1;
-}
-
 #ifdef CONFIG_PM_SLEEP
-static int tegra_set_wake(struct irq_data *d, unsigned int enable)
-{
-	u32 irq = d->hwirq;
-	u32 index, mask;
-
-	if (irq < FIRST_LEGACY_IRQ ||
-		irq >= FIRST_LEGACY_IRQ + num_ictlrs * 32)
-		return -EINVAL;
-
-	index = ((irq - FIRST_LEGACY_IRQ) / 32);
-	mask = BIT((irq - FIRST_LEGACY_IRQ) % 32);
-	if (enable)
-		ictlr_wake_mask[index] |= mask;
-	else
-		ictlr_wake_mask[index] &= ~mask;
-
-	return 0;
-}
-
-static int tegra_legacy_irq_suspend(void)
-{
-	unsigned long flags;
-	int i;
-
-	local_irq_save(flags);
-	for (i = 0; i < num_ictlrs; i++) {
-		void __iomem *ictlr = ictlr_reg_base[i];
-		/* Save interrupt state */
-		cpu_ier[i] = readl_relaxed(ictlr + ICTLR_CPU_IER);
-		cpu_iep[i] = readl_relaxed(ictlr + ICTLR_CPU_IEP_CLASS);
-		cop_ier[i] = readl_relaxed(ictlr + ICTLR_COP_IER);
-		cop_iep[i] = readl_relaxed(ictlr + ICTLR_COP_IEP_CLASS);
-
-		/* Disable COP interrupts */
-		writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
-
-		/* Disable CPU interrupts */
-		writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
-
-		/* Enable the wakeup sources of ictlr */
-		writel_relaxed(ictlr_wake_mask[i], ictlr + ICTLR_CPU_IER_SET);
-	}
-	local_irq_restore(flags);
-
-	return 0;
-}
-
-static void tegra_legacy_irq_resume(void)
-{
-	unsigned long flags;
-	int i;
-
-	local_irq_save(flags);
-	for (i = 0; i < num_ictlrs; i++) {
-		void __iomem *ictlr = ictlr_reg_base[i];
-		writel_relaxed(cpu_iep[i], ictlr + ICTLR_CPU_IEP_CLASS);
-		writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
-		writel_relaxed(cpu_ier[i], ictlr + ICTLR_CPU_IER_SET);
-		writel_relaxed(cop_iep[i], ictlr + ICTLR_COP_IEP_CLASS);
-		writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
-		writel_relaxed(cop_ier[i], ictlr + ICTLR_COP_IER_SET);
-	}
-	local_irq_restore(flags);
-}
-
-static struct syscore_ops tegra_legacy_irq_syscore_ops = {
-	.suspend = tegra_legacy_irq_suspend,
-	.resume = tegra_legacy_irq_resume,
-};
-
-int tegra_legacy_irq_syscore_init(void)
-{
-	register_syscore_ops(&tegra_legacy_irq_syscore_ops);
-
-	return 0;
-}
-
 static int tegra_gic_notifier(struct notifier_block *self,
 			      unsigned long cmd, void *v)
 {
@@ -251,45 +84,19 @@
 	cpu_pm_register_notifier(&tegra_gic_notifier_block);
 }
 #else
-#define tegra_set_wake NULL
 static void tegra114_gic_cpu_pm_registration(void) { }
 #endif
 
+static const struct of_device_id tegra_ictlr_match[] __initconst = {
+	{ .compatible = "nvidia,tegra20-ictlr" },
+	{ .compatible = "nvidia,tegra30-ictlr" },
+	{ }
+};
+
 void __init tegra_init_irq(void)
 {
-	int i;
-	void __iomem *distbase;
-
-	distbase = IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE);
-	num_ictlrs = readl_relaxed(distbase + GIC_DIST_CTR) & 0x1f;
-
-	if (num_ictlrs > ARRAY_SIZE(ictlr_reg_base)) {
-		WARN(1, "Too many (%d) interrupt controllers found. Maximum is %d.",
-			num_ictlrs, ARRAY_SIZE(ictlr_reg_base));
-		num_ictlrs = ARRAY_SIZE(ictlr_reg_base);
-	}
-
-	for (i = 0; i < num_ictlrs; i++) {
-		void __iomem *ictlr = ictlr_reg_base[i];
-		writel(~0, ictlr + ICTLR_CPU_IER_CLR);
-		writel(0, ictlr + ICTLR_CPU_IEP_CLASS);
-	}
-
-	gic_arch_extn.irq_ack = tegra_ack;
-	gic_arch_extn.irq_eoi = tegra_eoi;
-	gic_arch_extn.irq_mask = tegra_mask;
-	gic_arch_extn.irq_unmask = tegra_unmask;
-	gic_arch_extn.irq_retrigger = tegra_retrigger;
-	gic_arch_extn.irq_set_wake = tegra_set_wake;
-	gic_arch_extn.flags = IRQCHIP_MASK_ON_SUSPEND;
-
-	/*
-	 * Check if there is a devicetree present, since the GIC will be
-	 * initialized elsewhere under DT.
-	 */
-	if (!of_have_populated_dt())
-		gic_init(0, 29, distbase,
-			IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100));
+	if (WARN_ON(!of_find_matching_node(NULL, tegra_ictlr_match)))
+		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
 
 	tegra114_gic_cpu_pm_registration();
 }
diff --git a/arch/arm/mach-tegra/irq.h b/arch/arm/mach-tegra/irq.h
index bc05ce5..5142649 100644
--- a/arch/arm/mach-tegra/irq.h
+++ b/arch/arm/mach-tegra/irq.h
@@ -19,10 +19,4 @@
 
 bool tegra_pending_sgi(void);
 
-#ifdef CONFIG_PM_SLEEP
-int tegra_legacy_irq_syscore_init(void);
-#else
-static inline int tegra_legacy_irq_syscore_init(void) { return 0; }
-#endif
-
 #endif
diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c
index 914341b..861d884 100644
--- a/arch/arm/mach-tegra/tegra.c
+++ b/arch/arm/mach-tegra/tegra.c
@@ -82,7 +82,6 @@
 {
 	tegra_init_irq();
 	irqchip_init();
-	tegra_legacy_irq_syscore_init();
 }
 
 static void __init tegra_dt_init(void)
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index dbb2970..6ced0f68 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -52,7 +52,7 @@
 */
 void __init ux500_init_irq(void)
 {
-	gic_arch_extn.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND);
 	irqchip_init();
 
 	/*
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index c887196..58ef2a7 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -186,7 +186,7 @@
 
 static void __init zynq_irq_init(void)
 {
-	gic_arch_extn.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
+	gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND);
 	irqchip_init();
 }
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c274476..e315dfe 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2027,6 +2027,13 @@
 	if (!iommu)
 		return false;
 
+	/*
+	 * currently arm_iommu_create_mapping() takes a max of size_t
+	 * for size param. So check this limit for now.
+	 */
+	if (size > SIZE_MAX)
+		return false;
+
 	mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
 	if (IS_ERR(mapping)) {
 		pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 61b4d70..2438b96 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -44,24 +44,20 @@
 }
 
 /**
- * omap_read_persistent_clock -  Return time from a persistent clock.
+ * omap_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  */
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static cycles_t cycles;
 static unsigned int persistent_mult, persistent_shift;
-static DEFINE_SPINLOCK(read_persistent_clock_lock);
 
-static void omap_read_persistent_clock(struct timespec *ts)
+static void omap_read_persistent_clock64(struct timespec64 *ts)
 {
 	unsigned long long nsecs;
 	cycles_t last_cycles;
-	unsigned long flags;
-
-	spin_lock_irqsave(&read_persistent_clock_lock, flags);
 
 	last_cycles = cycles;
 	cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0;
@@ -69,11 +65,9 @@
 	nsecs = clocksource_cyc2ns(cycles - last_cycles,
 					persistent_mult, persistent_shift);
 
-	timespec_add_ns(&persistent_ts, nsecs);
+	timespec64_add_ns(&persistent_ts, nsecs);
 
 	*ts = persistent_ts;
-
-	spin_unlock_irqrestore(&read_persistent_clock_lock, flags);
 }
 
 /**
@@ -103,7 +97,7 @@
 
 	/*
 	 * 120000 rough estimate from the calculations in
-	 * __clocksource_updatefreq_scale.
+	 * __clocksource_update_freq_scale.
 	 */
 	clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
 			32768, NSEC_PER_SEC, 120000);
@@ -116,7 +110,7 @@
 	}
 
 	sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
-	register_persistent_clock(NULL, omap_read_persistent_clock);
+	register_persistent_clock(NULL, omap_read_persistent_clock64);
 	pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
 
 	return 0;
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 92bbae3..7052245 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -90,6 +90,7 @@
 #define ESR_ELx_FSC		(0x3F)
 #define ESR_ELx_FSC_TYPE	(0x3C)
 #define ESR_ELx_FSC_EXTABT	(0x10)
+#define ESR_ELx_FSC_ACCESS	(0x08)
 #define ESR_ELx_FSC_FAULT	(0x04)
 #define ESR_ELx_FSC_PERM	(0x0C)
 #define ESR_ELx_CV		(UL(1) << 24)
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 076a1c7..c0e5165 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,11 +18,12 @@
  */
 #ifndef __ASM_JUMP_LABEL_H
 #define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <asm/insn.h>
 
-#ifdef __KERNEL__
-
 #define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
 
 static __always_inline bool arch_static_branch(struct static_key *key)
@@ -39,8 +40,6 @@
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u64 jump_label_t;
 
 struct jump_entry {
@@ -49,4 +48,5 @@
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif	/* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 54bb4ba..ac6fafb 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -188,6 +188,7 @@
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT	ESR_ELx_FSC_FAULT
+#define FSC_ACCESS	ESR_ELx_FSC_ACCESS
 #define FSC_PERM	ESR_ELx_FSC_PERM
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8ac3c70f..f0f58c9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -28,6 +28,8 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -177,19 +179,10 @@
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-			      unsigned long end)
-{
-	return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-	return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 							 unsigned long address)
 {
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index 9f52beb..889c908 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -31,28 +31,6 @@
 	bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-	phys_addr_t	phys_addr;
-	u8		data[8];
-	u32		len;
-	bool		is_write;
-	void		*private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	run->mmio.phys_addr	= mmio->phys_addr;
-	run->mmio.len		= mmio->len;
-	run->mmio.is_write	= mmio->is_write;
-	memcpy(run->mmio.data, mmio->data, mmio->len);
-	run->exit_reason	= KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa);
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 3ef77a4..c154c0b7 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -191,6 +191,9 @@
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE		0x95c1ba5e
 #define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 32aeea0..ec37ab3 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@
 void update_vsyscall(struct timekeeper *tk)
 {
 	struct timespec xtime_coarse;
-	u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 
 	++vdso_data->tb_seq_count;
 	smp_wmb();
@@ -213,11 +213,11 @@
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
-		vdso_data->cs_cycle_last	= tk->tkr.cycle_last;
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_nsec	= tk->tkr.xtime_nsec;
-		vdso_data->cs_mult		= tk->tkr.mult;
-		vdso_data->cs_shift		= tk->tkr.shift;
+		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
 	}
 
 	smp_wmb();
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index f5590c8..5105e29 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -18,6 +18,7 @@
 
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on OF
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
@@ -25,10 +26,10 @@
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	select KVM_ARM_VGIC
-	select KVM_ARM_TIMER
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select SRCU
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQFD
 	---help---
 	  Support hosting virtualized guest machines.
 
@@ -50,17 +51,4 @@
 	  large, so only choose a reasonable number that you expect to
 	  actually use.
 
-config KVM_ARM_VGIC
-	bool
-	depends on KVM_ARM_HOST && OF
-	select HAVE_KVM_IRQCHIP
-	---help---
-	  Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-	bool
-	depends on KVM_ARM_VGIC
-	---help---
-	  Adds support for the Architected Timers in virtual machines.
-
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 4e6e09e..d5904f8 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm
 CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
@@ -11,7 +11,7 @@
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
@@ -19,11 +19,11 @@
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
-kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h
index d232888..0388ece 100644
--- a/arch/avr32/include/asm/elf.h
+++ b/arch/avr32/include/asm/elf.h
@@ -84,7 +84,7 @@
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
 
 
 /* This yields a mask that user programs can use to figure out what
diff --git a/arch/frv/mb93090-mb00/pci-vdk.c b/arch/frv/mb93090-mb00/pci-vdk.c
index b073f4d..f211839 100644
--- a/arch/frv/mb93090-mb00/pci-vdk.c
+++ b/arch/frv/mb93090-mb00/pci-vdk.c
@@ -316,6 +316,7 @@
 
 int __init pcibios_init(void)
 {
+	struct pci_bus *bus;
 	struct pci_ops *dir = NULL;
 	LIST_HEAD(resources);
 
@@ -383,12 +384,15 @@
 	printk("PCI: Probing PCI hardware\n");
 	pci_add_resource(&resources, &pci_ioport_resource);
 	pci_add_resource(&resources, &pci_iomem_resource);
-	pci_scan_root_bus(NULL, 0, pci_root_ops, NULL, &resources);
+	bus = pci_scan_root_bus(NULL, 0, pci_root_ops, NULL, &resources);
 
 	pcibios_irq_init();
 	pcibios_fixup_irqs();
 	pcibios_resource_survey();
+	if (!bus)
+		return 0;
 
+	pci_bus_add_devices(bus);
 	return 0;
 }
 
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 0b5ce82..1be65eb 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -271,7 +271,9 @@
  	if (bus == NULL) {
 		kfree(res);
 		kfree(controller);
+		return;
 	}
+	pci_bus_add_devices(bus);
 }
 
 /*
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index df96792..821de92 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -313,12 +313,16 @@
 	schedule_timeout(msecs_to_jiffies(200));
 
 	rootbus = pci_scan_bus(0, &mcf_pci_ops, NULL);
+	if (!rootbus)
+		return -ENODEV;
+
 	rootbus->resource[0] = &mcf_pci_io;
 	rootbus->resource[1] = &mcf_pci_mem;
 
 	pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq);
 	pci_bus_size_bridges(rootbus);
 	pci_bus_assign_resources(rootbus);
+	pci_bus_add_devices(rootbus);
 	return 0;
 }
 
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1a10a08..ed1643b 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -521,8 +521,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -573,5 +575,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 7859a73..d38822b 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -479,8 +479,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -531,5 +533,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 372593a..c429199 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -501,8 +501,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -553,5 +555,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index f3bd35e..9b88037 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -472,8 +472,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 9f9793f..49ae337 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -481,8 +481,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -533,5 +535,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 89f225c..ee143a5 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -503,8 +503,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -555,5 +557,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d3cdb54..c777aa0 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -583,8 +583,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -635,5 +637,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b4c7664..a7628a8 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -472,8 +472,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 0d4a26f..ebaa682 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -472,8 +472,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 5d581c5..2c16853 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -340,7 +340,7 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
-CONFIG_NE2000=m
+CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
@@ -494,8 +494,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -546,5 +548,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index c6b49a4..e3056bf 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -473,8 +473,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index b65785e..73c36b7 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -473,8 +473,10 @@
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -525,5 +527,6 @@
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/include/asm/mcfqspi.h b/arch/m68k/include/asm/mcfqspi.h
index 7b51416..256da0e 100644
--- a/arch/m68k/include/asm/mcfqspi.h
+++ b/arch/m68k/include/asm/mcfqspi.h
@@ -11,11 +11,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
 */
 
 #ifndef mcfqspi_h
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index 931a31f..8520250 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -62,7 +62,7 @@
 
 		r = dev->resource + idx;
 		if (!r->start && r->end) {
-			pr_err(KERN_ERR "PCI: Device %s not available because of resource collisions\n",
+			pr_err("PCI: Device %s not available because of resource collisions\n",
 				pci_name(dev));
 			return -EINVAL;
 		}
diff --git a/arch/m68k/lib/ashldi3.c b/arch/m68k/lib/ashldi3.c
index 7729f33..37234c2 100644
--- a/arch/m68k/lib/ashldi3.c
+++ b/arch/m68k/lib/ashldi3.c
@@ -11,12 +11,7 @@
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
diff --git a/arch/m68k/lib/ashrdi3.c b/arch/m68k/lib/ashrdi3.c
index 18ea5f7..1d59345 100644
--- a/arch/m68k/lib/ashrdi3.c
+++ b/arch/m68k/lib/ashrdi3.c
@@ -11,12 +11,7 @@
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
diff --git a/arch/m68k/lib/divsi3.S b/arch/m68k/lib/divsi3.S
index ec307b6..2c0ec85 100644
--- a/arch/m68k/lib/divsi3.S
+++ b/arch/m68k/lib/divsi3.S
@@ -19,12 +19,7 @@
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/lshrdi3.c b/arch/m68k/lib/lshrdi3.c
index d06442d..49e1ec8 100644
--- a/arch/m68k/lib/lshrdi3.c
+++ b/arch/m68k/lib/lshrdi3.c
@@ -11,12 +11,7 @@
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
diff --git a/arch/m68k/lib/modsi3.S b/arch/m68k/lib/modsi3.S
index ef38494..1d9e0ef 100644
--- a/arch/m68k/lib/modsi3.S
+++ b/arch/m68k/lib/modsi3.S
@@ -19,12 +19,7 @@
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c
index ee5f0b1..9006d15 100644
--- a/arch/m68k/lib/muldi3.c
+++ b/arch/m68k/lib/muldi3.c
@@ -12,12 +12,7 @@
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #ifdef CONFIG_CPU_HAS_NO_MULDIV64
 
diff --git a/arch/m68k/lib/mulsi3.S b/arch/m68k/lib/mulsi3.S
index ce29ea3..c39ad4e 100644
--- a/arch/m68k/lib/mulsi3.S
+++ b/arch/m68k/lib/mulsi3.S
@@ -19,12 +19,7 @@
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/udivsi3.S b/arch/m68k/lib/udivsi3.S
index c424c4a..35a5446 100644
--- a/arch/m68k/lib/udivsi3.S
+++ b/arch/m68k/lib/udivsi3.S
@@ -19,12 +19,7 @@
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/lib/umodsi3.S b/arch/m68k/lib/umodsi3.S
index 5def5f6..099da51 100644
--- a/arch/m68k/lib/umodsi3.S
+++ b/arch/m68k/lib/umodsi3.S
@@ -19,12 +19,7 @@
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 5403712..bb11dce 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -47,9 +47,8 @@
 	/* Disable all interrupts. Unlike a VIA it looks like we    */
 	/* do this by setting the source's interrupt level to zero. */
 
-	for (i = 0; i <= OSS_NUM_SOURCES; i++) {
+	for (i = 0; i < OSS_NUM_SOURCES; i++)
 		oss->irq_level[i] = 0;
-	}
 }
 
 /*
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 48528fb..ae838ed 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -1382,6 +1382,10 @@
 
 	/* Call common code to handle resource allocation */
 	pcibios_resource_survey();
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		if (hose->bus)
+			pci_bus_add_devices(hose->bus);
+	}
 
 	return 0;
 }
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index cdac7b3..8038647 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -16,38 +16,38 @@
 	.set push
 	SET_HARDFLOAT
 	cfc1	\tmp,  fcr31
-	swc1	$f0,  THREAD_FPR0_LS64(\thread)
-	swc1	$f1,  THREAD_FPR1_LS64(\thread)
-	swc1	$f2,  THREAD_FPR2_LS64(\thread)
-	swc1	$f3,  THREAD_FPR3_LS64(\thread)
-	swc1	$f4,  THREAD_FPR4_LS64(\thread)
-	swc1	$f5,  THREAD_FPR5_LS64(\thread)
-	swc1	$f6,  THREAD_FPR6_LS64(\thread)
-	swc1	$f7,  THREAD_FPR7_LS64(\thread)
-	swc1	$f8,  THREAD_FPR8_LS64(\thread)
-	swc1	$f9,  THREAD_FPR9_LS64(\thread)
-	swc1	$f10, THREAD_FPR10_LS64(\thread)
-	swc1	$f11, THREAD_FPR11_LS64(\thread)
-	swc1	$f12, THREAD_FPR12_LS64(\thread)
-	swc1	$f13, THREAD_FPR13_LS64(\thread)
-	swc1	$f14, THREAD_FPR14_LS64(\thread)
-	swc1	$f15, THREAD_FPR15_LS64(\thread)
-	swc1	$f16, THREAD_FPR16_LS64(\thread)
-	swc1	$f17, THREAD_FPR17_LS64(\thread)
-	swc1	$f18, THREAD_FPR18_LS64(\thread)
-	swc1	$f19, THREAD_FPR19_LS64(\thread)
-	swc1	$f20, THREAD_FPR20_LS64(\thread)
-	swc1	$f21, THREAD_FPR21_LS64(\thread)
-	swc1	$f22, THREAD_FPR22_LS64(\thread)
-	swc1	$f23, THREAD_FPR23_LS64(\thread)
-	swc1	$f24, THREAD_FPR24_LS64(\thread)
-	swc1	$f25, THREAD_FPR25_LS64(\thread)
-	swc1	$f26, THREAD_FPR26_LS64(\thread)
-	swc1	$f27, THREAD_FPR27_LS64(\thread)
-	swc1	$f28, THREAD_FPR28_LS64(\thread)
-	swc1	$f29, THREAD_FPR29_LS64(\thread)
-	swc1	$f30, THREAD_FPR30_LS64(\thread)
-	swc1	$f31, THREAD_FPR31_LS64(\thread)
+	swc1	$f0,  THREAD_FPR0(\thread)
+	swc1	$f1,  THREAD_FPR1(\thread)
+	swc1	$f2,  THREAD_FPR2(\thread)
+	swc1	$f3,  THREAD_FPR3(\thread)
+	swc1	$f4,  THREAD_FPR4(\thread)
+	swc1	$f5,  THREAD_FPR5(\thread)
+	swc1	$f6,  THREAD_FPR6(\thread)
+	swc1	$f7,  THREAD_FPR7(\thread)
+	swc1	$f8,  THREAD_FPR8(\thread)
+	swc1	$f9,  THREAD_FPR9(\thread)
+	swc1	$f10, THREAD_FPR10(\thread)
+	swc1	$f11, THREAD_FPR11(\thread)
+	swc1	$f12, THREAD_FPR12(\thread)
+	swc1	$f13, THREAD_FPR13(\thread)
+	swc1	$f14, THREAD_FPR14(\thread)
+	swc1	$f15, THREAD_FPR15(\thread)
+	swc1	$f16, THREAD_FPR16(\thread)
+	swc1	$f17, THREAD_FPR17(\thread)
+	swc1	$f18, THREAD_FPR18(\thread)
+	swc1	$f19, THREAD_FPR19(\thread)
+	swc1	$f20, THREAD_FPR20(\thread)
+	swc1	$f21, THREAD_FPR21(\thread)
+	swc1	$f22, THREAD_FPR22(\thread)
+	swc1	$f23, THREAD_FPR23(\thread)
+	swc1	$f24, THREAD_FPR24(\thread)
+	swc1	$f25, THREAD_FPR25(\thread)
+	swc1	$f26, THREAD_FPR26(\thread)
+	swc1	$f27, THREAD_FPR27(\thread)
+	swc1	$f28, THREAD_FPR28(\thread)
+	swc1	$f29, THREAD_FPR29(\thread)
+	swc1	$f30, THREAD_FPR30(\thread)
+	swc1	$f31, THREAD_FPR31(\thread)
 	sw	\tmp, THREAD_FCR31(\thread)
 	.set pop
 	.endm
@@ -56,38 +56,38 @@
 	.set push
 	SET_HARDFLOAT
 	lw	\tmp, THREAD_FCR31(\thread)
-	lwc1	$f0,  THREAD_FPR0_LS64(\thread)
-	lwc1	$f1,  THREAD_FPR1_LS64(\thread)
-	lwc1	$f2,  THREAD_FPR2_LS64(\thread)
-	lwc1	$f3,  THREAD_FPR3_LS64(\thread)
-	lwc1	$f4,  THREAD_FPR4_LS64(\thread)
-	lwc1	$f5,  THREAD_FPR5_LS64(\thread)
-	lwc1	$f6,  THREAD_FPR6_LS64(\thread)
-	lwc1	$f7,  THREAD_FPR7_LS64(\thread)
-	lwc1	$f8,  THREAD_FPR8_LS64(\thread)
-	lwc1	$f9,  THREAD_FPR9_LS64(\thread)
-	lwc1	$f10, THREAD_FPR10_LS64(\thread)
-	lwc1	$f11, THREAD_FPR11_LS64(\thread)
-	lwc1	$f12, THREAD_FPR12_LS64(\thread)
-	lwc1	$f13, THREAD_FPR13_LS64(\thread)
-	lwc1	$f14, THREAD_FPR14_LS64(\thread)
-	lwc1	$f15, THREAD_FPR15_LS64(\thread)
-	lwc1	$f16, THREAD_FPR16_LS64(\thread)
-	lwc1	$f17, THREAD_FPR17_LS64(\thread)
-	lwc1	$f18, THREAD_FPR18_LS64(\thread)
-	lwc1	$f19, THREAD_FPR19_LS64(\thread)
-	lwc1	$f20, THREAD_FPR20_LS64(\thread)
-	lwc1	$f21, THREAD_FPR21_LS64(\thread)
-	lwc1	$f22, THREAD_FPR22_LS64(\thread)
-	lwc1	$f23, THREAD_FPR23_LS64(\thread)
-	lwc1	$f24, THREAD_FPR24_LS64(\thread)
-	lwc1	$f25, THREAD_FPR25_LS64(\thread)
-	lwc1	$f26, THREAD_FPR26_LS64(\thread)
-	lwc1	$f27, THREAD_FPR27_LS64(\thread)
-	lwc1	$f28, THREAD_FPR28_LS64(\thread)
-	lwc1	$f29, THREAD_FPR29_LS64(\thread)
-	lwc1	$f30, THREAD_FPR30_LS64(\thread)
-	lwc1	$f31, THREAD_FPR31_LS64(\thread)
+	lwc1	$f0,  THREAD_FPR0(\thread)
+	lwc1	$f1,  THREAD_FPR1(\thread)
+	lwc1	$f2,  THREAD_FPR2(\thread)
+	lwc1	$f3,  THREAD_FPR3(\thread)
+	lwc1	$f4,  THREAD_FPR4(\thread)
+	lwc1	$f5,  THREAD_FPR5(\thread)
+	lwc1	$f6,  THREAD_FPR6(\thread)
+	lwc1	$f7,  THREAD_FPR7(\thread)
+	lwc1	$f8,  THREAD_FPR8(\thread)
+	lwc1	$f9,  THREAD_FPR9(\thread)
+	lwc1	$f10, THREAD_FPR10(\thread)
+	lwc1	$f11, THREAD_FPR11(\thread)
+	lwc1	$f12, THREAD_FPR12(\thread)
+	lwc1	$f13, THREAD_FPR13(\thread)
+	lwc1	$f14, THREAD_FPR14(\thread)
+	lwc1	$f15, THREAD_FPR15(\thread)
+	lwc1	$f16, THREAD_FPR16(\thread)
+	lwc1	$f17, THREAD_FPR17(\thread)
+	lwc1	$f18, THREAD_FPR18(\thread)
+	lwc1	$f19, THREAD_FPR19(\thread)
+	lwc1	$f20, THREAD_FPR20(\thread)
+	lwc1	$f21, THREAD_FPR21(\thread)
+	lwc1	$f22, THREAD_FPR22(\thread)
+	lwc1	$f23, THREAD_FPR23(\thread)
+	lwc1	$f24, THREAD_FPR24(\thread)
+	lwc1	$f25, THREAD_FPR25(\thread)
+	lwc1	$f26, THREAD_FPR26(\thread)
+	lwc1	$f27, THREAD_FPR27(\thread)
+	lwc1	$f28, THREAD_FPR28(\thread)
+	lwc1	$f29, THREAD_FPR29(\thread)
+	lwc1	$f30, THREAD_FPR30(\thread)
+	lwc1	$f31, THREAD_FPR31(\thread)
 	ctc1	\tmp, fcr31
 	.set pop
 	.endm
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 0cae459..6156ac8 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -60,22 +60,22 @@
 	.set	push
 	SET_HARDFLOAT
 	cfc1	\tmp, fcr31
-	sdc1	$f0,  THREAD_FPR0_LS64(\thread)
-	sdc1	$f2,  THREAD_FPR2_LS64(\thread)
-	sdc1	$f4,  THREAD_FPR4_LS64(\thread)
-	sdc1	$f6,  THREAD_FPR6_LS64(\thread)
-	sdc1	$f8,  THREAD_FPR8_LS64(\thread)
-	sdc1	$f10, THREAD_FPR10_LS64(\thread)
-	sdc1	$f12, THREAD_FPR12_LS64(\thread)
-	sdc1	$f14, THREAD_FPR14_LS64(\thread)
-	sdc1	$f16, THREAD_FPR16_LS64(\thread)
-	sdc1	$f18, THREAD_FPR18_LS64(\thread)
-	sdc1	$f20, THREAD_FPR20_LS64(\thread)
-	sdc1	$f22, THREAD_FPR22_LS64(\thread)
-	sdc1	$f24, THREAD_FPR24_LS64(\thread)
-	sdc1	$f26, THREAD_FPR26_LS64(\thread)
-	sdc1	$f28, THREAD_FPR28_LS64(\thread)
-	sdc1	$f30, THREAD_FPR30_LS64(\thread)
+	sdc1	$f0,  THREAD_FPR0(\thread)
+	sdc1	$f2,  THREAD_FPR2(\thread)
+	sdc1	$f4,  THREAD_FPR4(\thread)
+	sdc1	$f6,  THREAD_FPR6(\thread)
+	sdc1	$f8,  THREAD_FPR8(\thread)
+	sdc1	$f10, THREAD_FPR10(\thread)
+	sdc1	$f12, THREAD_FPR12(\thread)
+	sdc1	$f14, THREAD_FPR14(\thread)
+	sdc1	$f16, THREAD_FPR16(\thread)
+	sdc1	$f18, THREAD_FPR18(\thread)
+	sdc1	$f20, THREAD_FPR20(\thread)
+	sdc1	$f22, THREAD_FPR22(\thread)
+	sdc1	$f24, THREAD_FPR24(\thread)
+	sdc1	$f26, THREAD_FPR26(\thread)
+	sdc1	$f28, THREAD_FPR28(\thread)
+	sdc1	$f30, THREAD_FPR30(\thread)
 	sw	\tmp, THREAD_FCR31(\thread)
 	.set	pop
 	.endm
@@ -84,22 +84,22 @@
 	.set	push
 	.set	mips64r2
 	SET_HARDFLOAT
-	sdc1	$f1,  THREAD_FPR1_LS64(\thread)
-	sdc1	$f3,  THREAD_FPR3_LS64(\thread)
-	sdc1	$f5,  THREAD_FPR5_LS64(\thread)
-	sdc1	$f7,  THREAD_FPR7_LS64(\thread)
-	sdc1	$f9,  THREAD_FPR9_LS64(\thread)
-	sdc1	$f11, THREAD_FPR11_LS64(\thread)
-	sdc1	$f13, THREAD_FPR13_LS64(\thread)
-	sdc1	$f15, THREAD_FPR15_LS64(\thread)
-	sdc1	$f17, THREAD_FPR17_LS64(\thread)
-	sdc1	$f19, THREAD_FPR19_LS64(\thread)
-	sdc1	$f21, THREAD_FPR21_LS64(\thread)
-	sdc1	$f23, THREAD_FPR23_LS64(\thread)
-	sdc1	$f25, THREAD_FPR25_LS64(\thread)
-	sdc1	$f27, THREAD_FPR27_LS64(\thread)
-	sdc1	$f29, THREAD_FPR29_LS64(\thread)
-	sdc1	$f31, THREAD_FPR31_LS64(\thread)
+	sdc1	$f1,  THREAD_FPR1(\thread)
+	sdc1	$f3,  THREAD_FPR3(\thread)
+	sdc1	$f5,  THREAD_FPR5(\thread)
+	sdc1	$f7,  THREAD_FPR7(\thread)
+	sdc1	$f9,  THREAD_FPR9(\thread)
+	sdc1	$f11, THREAD_FPR11(\thread)
+	sdc1	$f13, THREAD_FPR13(\thread)
+	sdc1	$f15, THREAD_FPR15(\thread)
+	sdc1	$f17, THREAD_FPR17(\thread)
+	sdc1	$f19, THREAD_FPR19(\thread)
+	sdc1	$f21, THREAD_FPR21(\thread)
+	sdc1	$f23, THREAD_FPR23(\thread)
+	sdc1	$f25, THREAD_FPR25(\thread)
+	sdc1	$f27, THREAD_FPR27(\thread)
+	sdc1	$f29, THREAD_FPR29(\thread)
+	sdc1	$f31, THREAD_FPR31(\thread)
 	.set	pop
 	.endm
 
@@ -118,22 +118,22 @@
 	.set	push
 	SET_HARDFLOAT
 	lw	\tmp, THREAD_FCR31(\thread)
-	ldc1	$f0,  THREAD_FPR0_LS64(\thread)
-	ldc1	$f2,  THREAD_FPR2_LS64(\thread)
-	ldc1	$f4,  THREAD_FPR4_LS64(\thread)
-	ldc1	$f6,  THREAD_FPR6_LS64(\thread)
-	ldc1	$f8,  THREAD_FPR8_LS64(\thread)
-	ldc1	$f10, THREAD_FPR10_LS64(\thread)
-	ldc1	$f12, THREAD_FPR12_LS64(\thread)
-	ldc1	$f14, THREAD_FPR14_LS64(\thread)
-	ldc1	$f16, THREAD_FPR16_LS64(\thread)
-	ldc1	$f18, THREAD_FPR18_LS64(\thread)
-	ldc1	$f20, THREAD_FPR20_LS64(\thread)
-	ldc1	$f22, THREAD_FPR22_LS64(\thread)
-	ldc1	$f24, THREAD_FPR24_LS64(\thread)
-	ldc1	$f26, THREAD_FPR26_LS64(\thread)
-	ldc1	$f28, THREAD_FPR28_LS64(\thread)
-	ldc1	$f30, THREAD_FPR30_LS64(\thread)
+	ldc1	$f0,  THREAD_FPR0(\thread)
+	ldc1	$f2,  THREAD_FPR2(\thread)
+	ldc1	$f4,  THREAD_FPR4(\thread)
+	ldc1	$f6,  THREAD_FPR6(\thread)
+	ldc1	$f8,  THREAD_FPR8(\thread)
+	ldc1	$f10, THREAD_FPR10(\thread)
+	ldc1	$f12, THREAD_FPR12(\thread)
+	ldc1	$f14, THREAD_FPR14(\thread)
+	ldc1	$f16, THREAD_FPR16(\thread)
+	ldc1	$f18, THREAD_FPR18(\thread)
+	ldc1	$f20, THREAD_FPR20(\thread)
+	ldc1	$f22, THREAD_FPR22(\thread)
+	ldc1	$f24, THREAD_FPR24(\thread)
+	ldc1	$f26, THREAD_FPR26(\thread)
+	ldc1	$f28, THREAD_FPR28(\thread)
+	ldc1	$f30, THREAD_FPR30(\thread)
 	ctc1	\tmp, fcr31
 	.endm
 
@@ -141,22 +141,22 @@
 	.set	push
 	.set	mips64r2
 	SET_HARDFLOAT
-	ldc1	$f1,  THREAD_FPR1_LS64(\thread)
-	ldc1	$f3,  THREAD_FPR3_LS64(\thread)
-	ldc1	$f5,  THREAD_FPR5_LS64(\thread)
-	ldc1	$f7,  THREAD_FPR7_LS64(\thread)
-	ldc1	$f9,  THREAD_FPR9_LS64(\thread)
-	ldc1	$f11, THREAD_FPR11_LS64(\thread)
-	ldc1	$f13, THREAD_FPR13_LS64(\thread)
-	ldc1	$f15, THREAD_FPR15_LS64(\thread)
-	ldc1	$f17, THREAD_FPR17_LS64(\thread)
-	ldc1	$f19, THREAD_FPR19_LS64(\thread)
-	ldc1	$f21, THREAD_FPR21_LS64(\thread)
-	ldc1	$f23, THREAD_FPR23_LS64(\thread)
-	ldc1	$f25, THREAD_FPR25_LS64(\thread)
-	ldc1	$f27, THREAD_FPR27_LS64(\thread)
-	ldc1	$f29, THREAD_FPR29_LS64(\thread)
-	ldc1	$f31, THREAD_FPR31_LS64(\thread)
+	ldc1	$f1,  THREAD_FPR1(\thread)
+	ldc1	$f3,  THREAD_FPR3(\thread)
+	ldc1	$f5,  THREAD_FPR5(\thread)
+	ldc1	$f7,  THREAD_FPR7(\thread)
+	ldc1	$f9,  THREAD_FPR9(\thread)
+	ldc1	$f11, THREAD_FPR11(\thread)
+	ldc1	$f13, THREAD_FPR13(\thread)
+	ldc1	$f15, THREAD_FPR15(\thread)
+	ldc1	$f17, THREAD_FPR17(\thread)
+	ldc1	$f19, THREAD_FPR19(\thread)
+	ldc1	$f21, THREAD_FPR21(\thread)
+	ldc1	$f23, THREAD_FPR23(\thread)
+	ldc1	$f25, THREAD_FPR25(\thread)
+	ldc1	$f27, THREAD_FPR27(\thread)
+	ldc1	$f29, THREAD_FPR29(\thread)
+	ldc1	$f31, THREAD_FPR31(\thread)
 	.set	pop
 	.endm
 
@@ -211,6 +211,22 @@
 	.endm
 
 #ifdef TOOLCHAIN_SUPPORTS_MSA
+	.macro	_cfcmsa	rd, cs
+	.set	push
+	.set	mips32r2
+	.set	msa
+	cfcmsa	\rd, $\cs
+	.set	pop
+	.endm
+
+	.macro	_ctcmsa	cd, rs
+	.set	push
+	.set	mips32r2
+	.set	msa
+	ctcmsa	$\cd, \rs
+	.set	pop
+	.endm
+
 	.macro	ld_d	wd, off, base
 	.set	push
 	.set	mips32r2
@@ -227,35 +243,35 @@
 	.set	pop
 	.endm
 
-	.macro	copy_u_w	rd, ws, n
+	.macro	copy_u_w	ws, n
 	.set	push
 	.set	mips32r2
 	.set	msa
-	copy_u.w \rd, $w\ws[\n]
+	copy_u.w $1, $w\ws[\n]
 	.set	pop
 	.endm
 
-	.macro	copy_u_d	rd, ws, n
+	.macro	copy_u_d	ws, n
 	.set	push
 	.set	mips64r2
 	.set	msa
-	copy_u.d \rd, $w\ws[\n]
+	copy_u.d $1, $w\ws[\n]
 	.set	pop
 	.endm
 
-	.macro	insert_w	wd, n, rs
+	.macro	insert_w	wd, n
 	.set	push
 	.set	mips32r2
 	.set	msa
-	insert.w $w\wd[\n], \rs
+	insert.w $w\wd[\n], $1
 	.set	pop
 	.endm
 
-	.macro	insert_d	wd, n, rs
+	.macro	insert_d	wd, n
 	.set	push
 	.set	mips64r2
 	.set	msa
-	insert.d $w\wd[\n], \rs
+	insert.d $w\wd[\n], $1
 	.set	pop
 	.endm
 #else
@@ -283,7 +299,7 @@
 	/*
 	 * Temporary until all toolchains in use include MSA support.
 	 */
-	.macro	cfcmsa	rd, cs
+	.macro	_cfcmsa	rd, cs
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
@@ -293,7 +309,7 @@
 	.set	pop
 	.endm
 
-	.macro	ctcmsa	cd, rs
+	.macro	_ctcmsa	cd, rs
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
@@ -320,44 +336,36 @@
 	.set	pop
 	.endm
 
-	.macro	copy_u_w	rd, ws, n
+	.macro	copy_u_w	ws, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
 	.insn
 	.word	COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
-	/* move triggers an assembler bug... */
-	or	\rd, $1, zero
 	.set	pop
 	.endm
 
-	.macro	copy_u_d	rd, ws, n
+	.macro	copy_u_d	ws, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
 	.insn
 	.word	COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
-	/* move triggers an assembler bug... */
-	or	\rd, $1, zero
 	.set	pop
 	.endm
 
-	.macro	insert_w	wd, n, rs
+	.macro	insert_w	wd, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	/* move triggers an assembler bug... */
-	or	$1, \rs, zero
 	.word	INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
 	.set	pop
 	.endm
 
-	.macro	insert_d	wd, n, rs
+	.macro	insert_d	wd, n
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	/* move triggers an assembler bug... */
-	or	$1, \rs, zero
 	.word	INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
 	.set	pop
 	.endm
@@ -399,7 +407,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	cfcmsa	$1, MSA_CSR
+	_cfcmsa	$1, MSA_CSR
 	sw	$1, THREAD_MSA_CSR(\thread)
 	.set	pop
 	.endm
@@ -409,7 +417,7 @@
 	.set	noat
 	SET_HARDFLOAT
 	lw	$1, THREAD_MSA_CSR(\thread)
-	ctcmsa	MSA_CSR, $1
+	_ctcmsa	MSA_CSR, $1
 	.set	pop
 	ld_d	0, THREAD_FPR0, \thread
 	ld_d	1, THREAD_FPR1, \thread
@@ -452,9 +460,6 @@
 	insert_w \wd, 2
 	insert_w \wd, 3
 #endif
-	.if	31-\wd
-	msa_init_upper	(\wd+1)
-	.endif
 	.endm
 
 	.macro	msa_init_all_upper
@@ -463,6 +468,37 @@
 	SET_HARDFLOAT
 	not	$1, zero
 	msa_init_upper	0
+	msa_init_upper	1
+	msa_init_upper	2
+	msa_init_upper	3
+	msa_init_upper	4
+	msa_init_upper	5
+	msa_init_upper	6
+	msa_init_upper	7
+	msa_init_upper	8
+	msa_init_upper	9
+	msa_init_upper	10
+	msa_init_upper	11
+	msa_init_upper	12
+	msa_init_upper	13
+	msa_init_upper	14
+	msa_init_upper	15
+	msa_init_upper	16
+	msa_init_upper	17
+	msa_init_upper	18
+	msa_init_upper	19
+	msa_init_upper	20
+	msa_init_upper	21
+	msa_init_upper	22
+	msa_init_upper	23
+	msa_init_upper	24
+	msa_init_upper	25
+	msa_init_upper	26
+	msa_init_upper	27
+	msa_init_upper	28
+	msa_init_upper	29
+	msa_init_upper	30
+	msa_init_upper	31
 	.set	pop
 	.endm
 
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index dd083e9..b104ad9 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -48,6 +48,12 @@
 #define FPU_FR_MASK		0x1
 };
 
+#define __disable_fpu()							\
+do {									\
+	clear_c0_status(ST0_CU1);					\
+	disable_fpu_hazard();						\
+} while (0)
+
 static inline int __enable_fpu(enum fpu_mode mode)
 {
 	int fr;
@@ -86,7 +92,12 @@
 		enable_fpu_hazard();
 
 		/* check FR has the desired value */
-		return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+		if (!!(read_c0_status() & ST0_FR) == !!fr)
+			return 0;
+
+		/* unsupported FR value */
+		__disable_fpu();
+		return SIGFPE;
 
 	default:
 		BUG();
@@ -95,12 +106,6 @@
 	return SIGFPE;
 }
 
-#define __disable_fpu()							\
-do {									\
-	clear_c0_status(ST0_CU1);					\
-	disable_fpu_hazard();						\
-} while (0)
-
 #define clear_fpu_owner()	clear_thread_flag(TIF_USEDFPU)
 
 static inline int __is_fpu_owner(void)
@@ -170,6 +175,7 @@
 		}
 		disable_msa();
 		clear_thread_flag(TIF_USEDMSA);
+		__disable_fpu();
 	} else if (is_fpu_owner()) {
 		if (save)
 			_save_fp(current);
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index fdbff44..608aa57 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -8,9 +8,9 @@
 #ifndef _ASM_MIPS_JUMP_LABEL_H
 #define _ASM_MIPS_JUMP_LABEL_H
 
-#include <linux/types.h>
+#ifndef __ASSEMBLY__
 
-#ifdef __KERNEL__
+#include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
@@ -39,8 +39,6 @@
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_64BIT
 typedef u64 jump_label_t;
 #else
@@ -53,4 +51,5 @@
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_JUMP_LABEL_H */
diff --git a/arch/mips/include/asm/kdebug.h b/arch/mips/include/asm/kdebug.h
index 6a9af5f..cba22ab 100644
--- a/arch/mips/include/asm/kdebug.h
+++ b/arch/mips/include/asm/kdebug.h
@@ -10,7 +10,8 @@
 	DIE_RI,
 	DIE_PAGE_FAULT,
 	DIE_BREAK,
-	DIE_SSTEPBP
+	DIE_SSTEPBP,
+	DIE_MSAFP
 };
 
 #endif /* _ASM_MIPS_KDEBUG_H */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index ac4fc71..4c25823 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -21,10 +21,10 @@
 
 /* MIPS KVM register ids */
 #define MIPS_CP0_32(_R, _S)					\
-	(KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+	(KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
 
 #define MIPS_CP0_64(_R, _S)					\
-	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+	(KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S)))
 
 #define KVM_REG_MIPS_CP0_INDEX		MIPS_CP0_32(0, 0)
 #define KVM_REG_MIPS_CP0_ENTRYLO0	MIPS_CP0_64(2, 0)
@@ -42,11 +42,14 @@
 #define KVM_REG_MIPS_CP0_STATUS		MIPS_CP0_32(12, 0)
 #define KVM_REG_MIPS_CP0_CAUSE		MIPS_CP0_32(13, 0)
 #define KVM_REG_MIPS_CP0_EPC		MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_PRID		MIPS_CP0_32(15, 0)
 #define KVM_REG_MIPS_CP0_EBASE		MIPS_CP0_64(15, 1)
 #define KVM_REG_MIPS_CP0_CONFIG		MIPS_CP0_32(16, 0)
 #define KVM_REG_MIPS_CP0_CONFIG1	MIPS_CP0_32(16, 1)
 #define KVM_REG_MIPS_CP0_CONFIG2	MIPS_CP0_32(16, 2)
 #define KVM_REG_MIPS_CP0_CONFIG3	MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG4	MIPS_CP0_32(16, 4)
+#define KVM_REG_MIPS_CP0_CONFIG5	MIPS_CP0_32(16, 5)
 #define KVM_REG_MIPS_CP0_CONFIG7	MIPS_CP0_32(16, 7)
 #define KVM_REG_MIPS_CP0_XCONTEXT	MIPS_CP0_64(20, 0)
 #define KVM_REG_MIPS_CP0_ERROREPC	MIPS_CP0_64(30, 0)
@@ -119,6 +122,10 @@
 	u32 syscall_exits;
 	u32 resvd_inst_exits;
 	u32 break_inst_exits;
+	u32 trap_inst_exits;
+	u32 msa_fpe_exits;
+	u32 fpe_exits;
+	u32 msa_disabled_exits;
 	u32 flush_dcache_exits;
 	u32 halt_successful_poll;
 	u32 halt_wakeup;
@@ -138,6 +145,10 @@
 	SYSCALL_EXITS,
 	RESVD_INST_EXITS,
 	BREAK_INST_EXITS,
+	TRAP_INST_EXITS,
+	MSA_FPE_EXITS,
+	FPE_EXITS,
+	MSA_DISABLED_EXITS,
 	FLUSH_DCACHE_EXITS,
 	MAX_KVM_MIPS_EXIT_TYPES
 };
@@ -206,6 +217,8 @@
 #define MIPS_CP0_CONFIG1_SEL	1
 #define MIPS_CP0_CONFIG2_SEL	2
 #define MIPS_CP0_CONFIG3_SEL	3
+#define MIPS_CP0_CONFIG4_SEL	4
+#define MIPS_CP0_CONFIG5_SEL	5
 
 /* Config0 register bits */
 #define CP0C0_M			31
@@ -262,31 +275,6 @@
 #define CP0C3_SM		1
 #define CP0C3_TL		0
 
-/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0						\
-  ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
-
-/* Have config2, no coprocessor2 attached, no MDMX support attached,
-   no performance counters, watch registers present,
-   no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1						\
-((1 << CP0C1_M) |						\
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) |		\
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) |		\
- (0 << CP0C1_FP))
-
-/* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2						\
-((1 << CP0C2_M))
-
-/* No config4, no DSP ASE, no large physaddr (PABITS),
-   no external interrupt controller, no vectored interrupts,
-   no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3						\
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) |	\
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) |	\
- (0 << CP0C3_SM) | (0 << CP0C3_TL))
-
 /* MMU types, the first four entries have the same layout as the
    CP0C0_MT field.  */
 enum mips_mmu_types {
@@ -321,7 +309,9 @@
  */
 #define T_TRAP			13	/* Trap instruction */
 #define T_VCEI			14	/* Virtual coherency exception */
+#define T_MSAFPE		14	/* MSA floating point exception */
 #define T_FPE			15	/* Floating point exception */
+#define T_MSADIS		21	/* MSA disabled exception */
 #define T_WATCH			23	/* Watch address reference */
 #define T_VCED			31	/* Virtual coherency data */
 
@@ -374,6 +364,9 @@
 	long tlb_lo1;
 };
 
+#define KVM_MIPS_FPU_FPU	0x1
+#define KVM_MIPS_FPU_MSA	0x2
+
 #define KVM_MIPS_GUEST_TLB_SIZE	64
 struct kvm_vcpu_arch {
 	void *host_ebase, *guest_ebase;
@@ -395,6 +388,8 @@
 
 	/* FPU State */
 	struct mips_fpu_struct fpu;
+	/* Which FPU state is loaded (KVM_MIPS_FPU_*) */
+	unsigned int fpu_inuse;
 
 	/* COP0 State */
 	struct mips_coproc *cop0;
@@ -441,6 +436,9 @@
 
 	/* WAIT executed */
 	int wait;
+
+	u8 fpu_enabled;
+	u8 msa_enabled;
 };
 
 
@@ -482,11 +480,15 @@
 #define kvm_read_c0_guest_config1(cop0)		(cop0->reg[MIPS_CP0_CONFIG][1])
 #define kvm_read_c0_guest_config2(cop0)		(cop0->reg[MIPS_CP0_CONFIG][2])
 #define kvm_read_c0_guest_config3(cop0)		(cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config4(cop0)		(cop0->reg[MIPS_CP0_CONFIG][4])
+#define kvm_read_c0_guest_config5(cop0)		(cop0->reg[MIPS_CP0_CONFIG][5])
 #define kvm_read_c0_guest_config7(cop0)		(cop0->reg[MIPS_CP0_CONFIG][7])
 #define kvm_write_c0_guest_config(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][0] = (val))
 #define kvm_write_c0_guest_config1(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][1] = (val))
 #define kvm_write_c0_guest_config2(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][2] = (val))
 #define kvm_write_c0_guest_config3(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config4(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][4] = (val))
+#define kvm_write_c0_guest_config5(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][5] = (val))
 #define kvm_write_c0_guest_config7(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][7] = (val))
 #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
@@ -567,6 +569,31 @@
 	kvm_set_c0_guest_ebase(cop0, ((val) & (change)));		\
 }
 
+/* Helpers */
+
+static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+{
+	return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+		vcpu->fpu_enabled;
+}
+
+static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+{
+	return kvm_mips_guest_can_have_fpu(vcpu) &&
+		kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
+}
+
+static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+{
+	return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) &&
+		vcpu->msa_enabled;
+}
+
+static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu)
+{
+	return kvm_mips_guest_can_have_msa(vcpu) &&
+		kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA;
+}
 
 struct kvm_mips_callbacks {
 	int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
@@ -578,6 +605,10 @@
 	int (*handle_syscall)(struct kvm_vcpu *vcpu);
 	int (*handle_res_inst)(struct kvm_vcpu *vcpu);
 	int (*handle_break)(struct kvm_vcpu *vcpu);
+	int (*handle_trap)(struct kvm_vcpu *vcpu);
+	int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
+	int (*handle_fpe)(struct kvm_vcpu *vcpu);
+	int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
 	int (*vm_init)(struct kvm *kvm);
 	int (*vcpu_init)(struct kvm_vcpu *vcpu);
 	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -596,6 +627,8 @@
 			   const struct kvm_one_reg *reg, s64 *v);
 	int (*set_one_reg)(struct kvm_vcpu *vcpu,
 			   const struct kvm_one_reg *reg, s64 v);
+	int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
+	int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
 };
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -606,6 +639,19 @@
 /* Trampoline ASM routine to start running in "Guest" context */
 extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
 
+/* FPU/MSA context management */
+void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu);
+void __kvm_save_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu);
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_own_msa(struct kvm_vcpu *vcpu);
+void kvm_drop_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+
 /* TLB handling */
 uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
 
@@ -711,6 +757,26 @@
 						     struct kvm_run *run,
 						     struct kvm_vcpu *vcpu);
 
+extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+						       uint32_t *opc,
+						       struct kvm_run *run,
+						       struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+							 uint32_t *opc,
+							 struct kvm_run *run,
+							 struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+						      uint32_t *opc,
+						      struct kvm_run *run,
+						      struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+							 uint32_t *opc,
+							 struct kvm_run *run,
+							 struct kvm_vcpu *vcpu);
+
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 							 struct kvm_run *run);
 
@@ -749,6 +815,11 @@
 					    struct kvm_run *run,
 					    struct kvm_vcpu *vcpu);
 
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+
 /* Dynamic binary translation */
 extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
 				      struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index b5dcbee..9b3b48e 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -105,7 +105,7 @@
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 # define FPR_IDX(width, idx)	(idx)
 #else
-# define FPR_IDX(width, idx)	((FPU_REG_WIDTH / (width)) - 1 - (idx))
+# define FPR_IDX(width, idx)	((idx) ^ ((64 / (width)) - 1))
 #endif
 
 #define BUILD_FPR_ACCESS(width) \
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index 2c04b6d..6985eb5 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -36,77 +36,85 @@
 
 /*
  * for KVM_GET_FPU and KVM_SET_FPU
- *
- * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs
- * are zero filled.
  */
 struct kvm_fpu {
-	__u64 fpr[32];
-	__u32 fir;
-	__u32 fccr;
-	__u32 fexr;
-	__u32 fenr;
-	__u32 fcsr;
-	__u32 pad;
 };
 
 
 /*
- * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
  * registers.  The id field is broken down as follows:
  *
- *  bits[2..0]   - Register 'sel' index.
- *  bits[7..3]   - Register 'rd'  index.
- *  bits[15..8]  - Must be zero.
- *  bits[31..16] - 1 -> CP0 registers.
- *  bits[51..32] - Must be zero.
  *  bits[63..52] - As per linux/kvm.h
+ *  bits[51..32] - Must be zero.
+ *  bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ *  bits[15..8]  - Must be zero.
+ *  bits[7..3]   - Register 'rd'  index.
+ *  bits[2..0]   - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
  *
  * Other sets registers may be added in the future.  Each set would
  * have its own identifier in bits[31..16].
- *
- * The registers defined in struct kvm_regs are also accessible, the
- * id values for these are below.
  */
 
-#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0)
-#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1)
-#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2)
-#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3)
-#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4)
-#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5)
-#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6)
-#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7)
-#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8)
-#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9)
-#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10)
-#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11)
-#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12)
-#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13)
-#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14)
-#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15)
-#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16)
-#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17)
-#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18)
-#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19)
-#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20)
-#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21)
-#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22)
-#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23)
-#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24)
-#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25)
-#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26)
-#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27)
-#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28)
-#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29)
-#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30)
-#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31)
+#define KVM_REG_MIPS_GP		(KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0	(KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM	(KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU	(KVM_REG_MIPS | 0x0000000000030000ULL)
 
-#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32)
-#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
-#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
 
-/* KVM specific control registers */
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  0)
+#define KVM_REG_MIPS_R1		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  1)
+#define KVM_REG_MIPS_R2		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  2)
+#define KVM_REG_MIPS_R3		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  3)
+#define KVM_REG_MIPS_R4		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  4)
+#define KVM_REG_MIPS_R5		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  5)
+#define KVM_REG_MIPS_R6		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  6)
+#define KVM_REG_MIPS_R7		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  7)
+#define KVM_REG_MIPS_R8		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  8)
+#define KVM_REG_MIPS_R9		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  9)
+#define KVM_REG_MIPS_R10	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31	(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC		(KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
 
 /*
  * CP0_Count control
@@ -118,8 +126,7 @@
  *        safely without losing time or guest timer interrupts.
  * Other: Reserved, do not change.
  */
-#define KVM_REG_MIPS_COUNT_CTL		(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-					 0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL	    (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
 #define KVM_REG_MIPS_COUNT_CTL_DC	0x00000001
 
 /*
@@ -131,15 +138,46 @@
  * emulated.
  * Modifications to times in the future are rejected.
  */
-#define KVM_REG_MIPS_COUNT_RESUME	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-					 0x20000 | 1)
+#define KVM_REG_MIPS_COUNT_RESUME   (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
 /*
  * CP0_Count rate in Hz
  * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
  * discontinuities in CP0_Count.
  */
-#define KVM_REG_MIPS_COUNT_HZ		(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-					 0x20000 | 2)
+#define KVM_REG_MIPS_COUNT_HZ	    (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ *  bits[15..8]  - Register subset (see definitions below).
+ *  bits[7..5]   - Must be zero.
+ *  bits[4..0]   - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR	(KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR	(KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR	(KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32  | (n))
+#define KVM_REG_MIPS_FPR_64(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64  | (n))
+#define KVM_REG_MIPS_VEC_128(n)	(KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR	(KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_FCR_CSR	(KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR	 (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_MSA_CSR	 (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  1)
+
 
 /*
  * KVM MIPS specific structures and definitions
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 750d67a..e59fd7c 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -167,72 +167,6 @@
 	OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
 	OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
 
-	/* the least significant 64 bits of each FP register */
-	OFFSET(THREAD_FPR0_LS64, task_struct,
-	       thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR1_LS64, task_struct,
-	       thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR2_LS64, task_struct,
-	       thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR3_LS64, task_struct,
-	       thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR4_LS64, task_struct,
-	       thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR5_LS64, task_struct,
-	       thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR6_LS64, task_struct,
-	       thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR7_LS64, task_struct,
-	       thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR8_LS64, task_struct,
-	       thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR9_LS64, task_struct,
-	       thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR10_LS64, task_struct,
-	       thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR11_LS64, task_struct,
-	       thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR12_LS64, task_struct,
-	       thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR13_LS64, task_struct,
-	       thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR14_LS64, task_struct,
-	       thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR15_LS64, task_struct,
-	       thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR16_LS64, task_struct,
-	       thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR17_LS64, task_struct,
-	       thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR18_LS64, task_struct,
-	       thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR19_LS64, task_struct,
-	       thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR20_LS64, task_struct,
-	       thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR21_LS64, task_struct,
-	       thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR22_LS64, task_struct,
-	       thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR23_LS64, task_struct,
-	       thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR24_LS64, task_struct,
-	       thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR25_LS64, task_struct,
-	       thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR26_LS64, task_struct,
-	       thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR27_LS64, task_struct,
-	       thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR28_LS64, task_struct,
-	       thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR29_LS64, task_struct,
-	       thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR30_LS64, task_struct,
-	       thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
-	OFFSET(THREAD_FPR31_LS64, task_struct,
-	       thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
-
 	OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
 	OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr);
 	BLANK();
@@ -470,6 +404,45 @@
 	OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
 	OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
 	OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
+	BLANK();
+
+	OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
+	OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
+	OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]);
+	OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]);
+	OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]);
+	OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]);
+	OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]);
+	OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]);
+	OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]);
+	OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]);
+	OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]);
+	OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]);
+	OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]);
+	OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]);
+	OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]);
+	OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]);
+	OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]);
+	OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]);
+	OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]);
+	OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]);
+	OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]);
+	OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]);
+	OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]);
+	OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]);
+	OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]);
+	OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]);
+	OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]);
+	OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]);
+	OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]);
+	OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]);
+	OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]);
+	OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]);
+
+	OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
+	OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
+	BLANK();
+
 	OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
 	OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
 	OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 2ebaabe..af42e70 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -360,12 +360,15 @@
 	.set	mips1
 	SET_HARDFLOAT
 	cfc1	a1, fcr31
-	li	a2, ~(0x3f << 12)
-	and	a2, a1
-	ctc1	a2, fcr31
 	.set	pop
-	TRACE_IRQS_ON
-	STI
+	CLI
+	TRACE_IRQS_OFF
+	.endm
+
+	.macro	__build_clear_msa_fpe
+	_cfcmsa	a1, MSA_CSR
+	CLI
+	TRACE_IRQS_OFF
 	.endm
 
 	.macro	__build_clear_ade
@@ -426,7 +429,7 @@
 	BUILD_HANDLER cpu cpu sti silent		/* #11 */
 	BUILD_HANDLER ov ov sti silent			/* #12 */
 	BUILD_HANDLER tr tr sti silent			/* #13 */
-	BUILD_HANDLER msa_fpe msa_fpe sti silent	/* #14 */
+	BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent	/* #14 */
 	BUILD_HANDLER fpe fpe fpe silent		/* #15 */
 	BUILD_HANDLER ftlb ftlb none silent		/* #16 */
 	BUILD_HANDLER msa msa sti silent		/* #21 */
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 5104528..7da6e32 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -46,6 +46,26 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+static void init_fp_ctx(struct task_struct *target)
+{
+	/* If FP has been used then the target already has context */
+	if (tsk_used_math(target))
+		return;
+
+	/* Begin with data registers set to all 1s... */
+	memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+
+	/* ...and FCSR zeroed */
+	target->thread.fpu.fcr31 = 0;
+
+	/*
+	 * Record that the target has "used" math, such that the context
+	 * just initialised, and any modifications made by the caller,
+	 * aren't discarded.
+	 */
+	set_stopped_child_used_math(target);
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -142,6 +162,7 @@
 	if (!access_ok(VERIFY_READ, data, 33 * 8))
 		return -EIO;
 
+	init_fp_ctx(child);
 	fregs = get_fpu_regs(child);
 
 	for (i = 0; i < 32; i++) {
@@ -439,6 +460,8 @@
 
 	/* XXX fcr31  */
 
+	init_fp_ctx(target);
+
 	if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &target->thread.fpu,
@@ -660,12 +683,7 @@
 		case FPR_BASE ... FPR_BASE + 31: {
 			union fpureg *fregs = get_fpu_regs(child);
 
-			if (!tsk_used_math(child)) {
-				/* FP not yet used  */
-				memset(&child->thread.fpu, ~0,
-				       sizeof(child->thread.fpu));
-				child->thread.fpu.fcr31 = 0;
-			}
+			init_fp_ctx(child);
 #ifdef CONFIG_32BIT
 			if (test_thread_flag(TIF_32BIT_FPREGS)) {
 				/*
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 676c503..1d88af2 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -34,7 +34,6 @@
 	.endm
 
 	.set	noreorder
-	.set	MIPS_ISA_ARCH_LEVEL_RAW
 
 LEAF(_save_fp_context)
 	.set	push
@@ -103,6 +102,7 @@
 	/* Save 32-bit process floating point context */
 LEAF(_save_fp_context32)
 	.set push
+	.set MIPS_ISA_ARCH_LEVEL_RAW
 	SET_HARDFLOAT
 	cfc1	t1, fcr31
 
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 33984c0..5b4d711 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -701,6 +701,13 @@
 
 int process_fpemu_return(int sig, void __user *fault_addr)
 {
+	/*
+	 * We can't allow the emulated instruction to leave any of the cause
+	 * bits set in FCSR. If they were then the kernel would take an FP
+	 * exception when restoring FP context.
+	 */
+	current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+
 	if (sig == SIGSEGV || sig == SIGBUS) {
 		struct siginfo si = {0};
 		si.si_addr = fault_addr;
@@ -781,6 +788,11 @@
 	if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs),
 		       SIGFPE) == NOTIFY_STOP)
 		goto out;
+
+	/* Clear FCSR.Cause before enabling interrupts */
+	write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+	local_irq_enable();
+
 	die_if_kernel("FP exception in kernel code", regs);
 
 	if (fcr31 & FPU_CSR_UNI_X) {
@@ -804,18 +816,12 @@
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 					       &fault_addr);
 
-		/*
-		 * We can't allow the emulated instruction to leave any of
-		 * the cause bit set in $fcr31.
-		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		/* If something went wrong, signal */
+		process_fpemu_return(sig, fault_addr);
 
 		/* Restore the hardware register state */
 		own_fpu(1);	/* Using the FPU again.	 */
 
-		/* If something went wrong, signal */
-		process_fpemu_return(sig, fault_addr);
-
 		goto out;
 	} else if (fcr31 & FPU_CSR_INV_X)
 		info.si_code = FPE_FLTINV;
@@ -1392,13 +1398,22 @@
 	exception_exit(prev_state);
 }
 
-asmlinkage void do_msa_fpe(struct pt_regs *regs)
+asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
 {
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
+	if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0,
+		       regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP)
+		goto out;
+
+	/* Clear MSACSR.Cause before enabling interrupts */
+	write_msa_csr(msacsr & ~MSA_CSR_CAUSEF);
+	local_irq_enable();
+
 	die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
 	force_sig(SIGFPE, current);
+out:
 	exception_exit(prev_state);
 }
 
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 401fe02..637ebbe 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -1,13 +1,15 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
-kvm-objs := $(common-objs) mips.o emulate.o locore.o \
+common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
+
+kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
 	    interrupt.o stats.o commpage.o \
-	    dyntrans.o trap_emul.o
+	    dyntrans.o trap_emul.o fpu.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-y			+= callback.o tlb.o
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index fb3e8df..6230f37 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -884,6 +884,84 @@
 	return EMULATE_DONE;
 }
 
+/**
+ * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config1 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu)
+{
+	unsigned int mask = 0;
+
+	/* Permit FPU to be present if FPU is supported */
+	if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+		mask |= MIPS_CONF1_FP;
+
+	return mask;
+}
+
+/**
+ * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config3 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
+{
+	/* Config4 is optional */
+	unsigned int mask = MIPS_CONF_M;
+
+	/* Permit MSA to be present if MSA is supported */
+	if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+		mask |= MIPS_CONF3_MSA;
+
+	return mask;
+}
+
+/**
+ * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config4 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
+{
+	/* Config5 is optional */
+	return MIPS_CONF_M;
+}
+
+/**
+ * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5
+ * @vcpu:	Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config5 CP0
+ * register, by the guest itself.
+ */
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
+{
+	unsigned int mask = 0;
+
+	/* Permit MSAEn changes if MSA supported and enabled */
+	if (kvm_mips_guest_has_msa(&vcpu->arch))
+		mask |= MIPS_CONF5_MSAEN;
+
+	/*
+	 * Permit guest FPU mode changes if FPU is enabled and the relevant
+	 * feature exists according to FIR register.
+	 */
+	if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+		if (cpu_has_fre)
+			mask |= MIPS_CONF5_FRE;
+		/* We don't support UFR or UFE */
+	}
+
+	return mask;
+}
+
 enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 					   uint32_t cause, struct kvm_run *run,
 					   struct kvm_vcpu *vcpu)
@@ -1021,18 +1099,114 @@
 				kvm_mips_write_compare(vcpu,
 						       vcpu->arch.gprs[rt]);
 			} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
-				kvm_write_c0_guest_status(cop0,
-							  vcpu->arch.gprs[rt]);
+				unsigned int old_val, val, change;
+
+				old_val = kvm_read_c0_guest_status(cop0);
+				val = vcpu->arch.gprs[rt];
+				change = val ^ old_val;
+
+				/* Make sure that the NMI bit is never set */
+				val &= ~ST0_NMI;
+
 				/*
-				 * Make sure that CU1 and NMI bits are
-				 * never set
+				 * Don't allow CU1 or FR to be set unless FPU
+				 * capability enabled and exists in guest
+				 * configuration.
 				 */
-				kvm_clear_c0_guest_status(cop0,
-							  (ST0_CU1 | ST0_NMI));
+				if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+					val &= ~(ST0_CU1 | ST0_FR);
+
+				/*
+				 * Also don't allow FR to be set if host doesn't
+				 * support it.
+				 */
+				if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+					val &= ~ST0_FR;
+
+
+				/* Handle changes in FPU mode */
+				preempt_disable();
+
+				/*
+				 * FPU and Vector register state is made
+				 * UNPREDICTABLE by a change of FR, so don't
+				 * even bother saving it.
+				 */
+				if (change & ST0_FR)
+					kvm_drop_fpu(vcpu);
+
+				/*
+				 * If MSA state is already live, it is undefined
+				 * how it interacts with FR=0 FPU state, and we
+				 * don't want to hit reserved instruction
+				 * exceptions trying to save the MSA state later
+				 * when CU=1 && FR=1, so play it safe and save
+				 * it first.
+				 */
+				if (change & ST0_CU1 && !(val & ST0_FR) &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+					kvm_lose_fpu(vcpu);
+
+				/*
+				 * Propagate CU1 (FPU enable) changes
+				 * immediately if the FPU context is already
+				 * loaded. When disabling we leave the context
+				 * loaded so it can be quickly enabled again in
+				 * the near future.
+				 */
+				if (change & ST0_CU1 &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+					change_c0_status(ST0_CU1, val);
+
+				preempt_enable();
+
+				kvm_write_c0_guest_status(cop0, val);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
-				kvm_mips_trans_mtc0(inst, opc, vcpu);
+				/*
+				 * If FPU present, we need CU1/FR bits to take
+				 * effect fairly soon.
+				 */
+				if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+					kvm_mips_trans_mtc0(inst, opc, vcpu);
 #endif
+			} else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+				unsigned int old_val, val, change, wrmask;
+
+				old_val = kvm_read_c0_guest_config5(cop0);
+				val = vcpu->arch.gprs[rt];
+
+				/* Only a few bits are writable in Config5 */
+				wrmask = kvm_mips_config5_wrmask(vcpu);
+				change = (val ^ old_val) & wrmask;
+				val = old_val ^ change;
+
+
+				/* Handle changes in FPU/MSA modes */
+				preempt_disable();
+
+				/*
+				 * Propagate FRE changes immediately if the FPU
+				 * context is already loaded.
+				 */
+				if (change & MIPS_CONF5_FRE &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+					change_c0_config5(MIPS_CONF5_FRE, val);
+
+				/*
+				 * Propagate MSAEn changes immediately if the
+				 * MSA context is already loaded. When disabling
+				 * we leave the context loaded so it can be
+				 * quickly enabled again in the near future.
+				 */
+				if (change & MIPS_CONF5_MSAEN &&
+				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+					change_c0_config5(MIPS_CONF5_MSAEN,
+							  val);
+
+				preempt_enable();
+
+				kvm_write_c0_guest_config5(cop0, val);
 			} else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
 				uint32_t old_cause, new_cause;
 
@@ -1970,6 +2144,146 @@
 	return er;
 }
 
+enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+						uint32_t *opc,
+						struct kvm_run *run,
+						struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_TRAP << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver TRAP when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
+enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+						  uint32_t *opc,
+						  struct kvm_run *run,
+						  struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_MSAFPE << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
+enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+					       uint32_t *opc,
+					       struct kvm_run *run,
+					       struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_FPE << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver FPE when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
+enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+						  uint32_t *opc,
+						  struct kvm_run *run,
+						  struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_vcpu_arch *arch = &vcpu->arch;
+	enum emulation_result er = EMULATE_DONE;
+
+	if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+		/* save old pc */
+		kvm_write_c0_guest_epc(cop0, arch->pc);
+		kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+		if (cause & CAUSEF_BD)
+			kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+		else
+			kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+		kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
+
+		kvm_change_c0_guest_cause(cop0, (0xff),
+					  (T_MSADIS << CAUSEB_EXCCODE));
+
+		/* Set PC to the exception entry point */
+		arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+	} else {
+		kvm_err("Trying to deliver MSADIS when EXL is already set\n");
+		er = EMULATE_FAIL;
+	}
+
+	return er;
+}
+
 /* ll/sc, rdhwr, sync emulation */
 
 #define OPCODE 0xfc000000
@@ -2176,6 +2490,10 @@
 		case T_SYSCALL:
 		case T_BREAK:
 		case T_RES_INST:
+		case T_TRAP:
+		case T_MSAFPE:
+		case T_FPE:
+		case T_MSADIS:
 			break;
 
 		case T_COP_UNUSABLE:
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
new file mode 100644
index 0000000..531fbf5
--- /dev/null
+++ b/arch/mips/kvm/fpu.S
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * FPU context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+	.set	noreorder
+	.set	noat
+
+LEAF(__kvm_save_fpu)
+	.set	push
+	.set	mips64r2
+	SET_HARDFLOAT
+	mfc0	t0, CP0_STATUS
+	sll     t0, t0, 5			# is Status.FR set?
+	bgez    t0, 1f				# no: skip odd doubles
+	 nop
+	sdc1	$f1,  VCPU_FPR1(a0)
+	sdc1	$f3,  VCPU_FPR3(a0)
+	sdc1	$f5,  VCPU_FPR5(a0)
+	sdc1	$f7,  VCPU_FPR7(a0)
+	sdc1	$f9,  VCPU_FPR9(a0)
+	sdc1	$f11, VCPU_FPR11(a0)
+	sdc1	$f13, VCPU_FPR13(a0)
+	sdc1	$f15, VCPU_FPR15(a0)
+	sdc1	$f17, VCPU_FPR17(a0)
+	sdc1	$f19, VCPU_FPR19(a0)
+	sdc1	$f21, VCPU_FPR21(a0)
+	sdc1	$f23, VCPU_FPR23(a0)
+	sdc1	$f25, VCPU_FPR25(a0)
+	sdc1	$f27, VCPU_FPR27(a0)
+	sdc1	$f29, VCPU_FPR29(a0)
+	sdc1	$f31, VCPU_FPR31(a0)
+1:	sdc1	$f0,  VCPU_FPR0(a0)
+	sdc1	$f2,  VCPU_FPR2(a0)
+	sdc1	$f4,  VCPU_FPR4(a0)
+	sdc1	$f6,  VCPU_FPR6(a0)
+	sdc1	$f8,  VCPU_FPR8(a0)
+	sdc1	$f10, VCPU_FPR10(a0)
+	sdc1	$f12, VCPU_FPR12(a0)
+	sdc1	$f14, VCPU_FPR14(a0)
+	sdc1	$f16, VCPU_FPR16(a0)
+	sdc1	$f18, VCPU_FPR18(a0)
+	sdc1	$f20, VCPU_FPR20(a0)
+	sdc1	$f22, VCPU_FPR22(a0)
+	sdc1	$f24, VCPU_FPR24(a0)
+	sdc1	$f26, VCPU_FPR26(a0)
+	sdc1	$f28, VCPU_FPR28(a0)
+	jr	ra
+	 sdc1	$f30, VCPU_FPR30(a0)
+	.set	pop
+	END(__kvm_save_fpu)
+
+LEAF(__kvm_restore_fpu)
+	.set	push
+	.set	mips64r2
+	SET_HARDFLOAT
+	mfc0	t0, CP0_STATUS
+	sll     t0, t0, 5			# is Status.FR set?
+	bgez    t0, 1f				# no: skip odd doubles
+	 nop
+	ldc1	$f1,  VCPU_FPR1(a0)
+	ldc1	$f3,  VCPU_FPR3(a0)
+	ldc1	$f5,  VCPU_FPR5(a0)
+	ldc1	$f7,  VCPU_FPR7(a0)
+	ldc1	$f9,  VCPU_FPR9(a0)
+	ldc1	$f11, VCPU_FPR11(a0)
+	ldc1	$f13, VCPU_FPR13(a0)
+	ldc1	$f15, VCPU_FPR15(a0)
+	ldc1	$f17, VCPU_FPR17(a0)
+	ldc1	$f19, VCPU_FPR19(a0)
+	ldc1	$f21, VCPU_FPR21(a0)
+	ldc1	$f23, VCPU_FPR23(a0)
+	ldc1	$f25, VCPU_FPR25(a0)
+	ldc1	$f27, VCPU_FPR27(a0)
+	ldc1	$f29, VCPU_FPR29(a0)
+	ldc1	$f31, VCPU_FPR31(a0)
+1:	ldc1	$f0,  VCPU_FPR0(a0)
+	ldc1	$f2,  VCPU_FPR2(a0)
+	ldc1	$f4,  VCPU_FPR4(a0)
+	ldc1	$f6,  VCPU_FPR6(a0)
+	ldc1	$f8,  VCPU_FPR8(a0)
+	ldc1	$f10, VCPU_FPR10(a0)
+	ldc1	$f12, VCPU_FPR12(a0)
+	ldc1	$f14, VCPU_FPR14(a0)
+	ldc1	$f16, VCPU_FPR16(a0)
+	ldc1	$f18, VCPU_FPR18(a0)
+	ldc1	$f20, VCPU_FPR20(a0)
+	ldc1	$f22, VCPU_FPR22(a0)
+	ldc1	$f24, VCPU_FPR24(a0)
+	ldc1	$f26, VCPU_FPR26(a0)
+	ldc1	$f28, VCPU_FPR28(a0)
+	jr	ra
+	 ldc1	$f30, VCPU_FPR30(a0)
+	.set	pop
+	END(__kvm_restore_fpu)
+
+LEAF(__kvm_restore_fcsr)
+	.set	push
+	SET_HARDFLOAT
+	lw	t0, VCPU_FCR31(a0)
+	/*
+	 * The ctc1 must stay at this offset in __kvm_restore_fcsr.
+	 * See kvm_mips_csr_die_notify() which handles t0 containing a value
+	 * which triggers an FP Exception, which must be stepped over and
+	 * ignored since the set cause bits must remain there for the guest.
+	 */
+	ctc1	t0, fcr31
+	jr	ra
+	 nop
+	.set	pop
+	END(__kvm_restore_fcsr)
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 4a68b17..c567240 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -36,6 +36,8 @@
 #define PT_HOST_USERLOCAL   PT_EPC
 
 #define CP0_DDATA_LO        $28,3
+#define CP0_CONFIG3         $16,3
+#define CP0_CONFIG5         $16,5
 #define CP0_EBASE           $15,1
 
 #define CP0_INTCTL          $12,1
@@ -353,6 +355,42 @@
 	LONG_L	k0, VCPU_HOST_EBASE(k1)
 	mtc0	k0,CP0_EBASE
 
+	/*
+	 * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
+	 * trigger FPE for pending exceptions.
+	 */
+	.set	at
+	and	v1, v0, ST0_CU1
+	beqz	v1, 1f
+	 nop
+	.set	push
+	SET_HARDFLOAT
+	cfc1	t0, fcr31
+	sw	t0, VCPU_FCR31(k1)
+	ctc1	zero,fcr31
+	.set	pop
+	.set	noat
+1:
+
+#ifdef CONFIG_CPU_HAS_MSA
+	/*
+	 * If MSA is enabled, save MSACSR and clear it so that later
+	 * instructions don't trigger MSAFPE for pending exceptions.
+	 */
+	mfc0	t0, CP0_CONFIG3
+	ext	t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
+	beqz	t0, 1f
+	 nop
+	mfc0	t0, CP0_CONFIG5
+	ext	t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
+	beqz	t0, 1f
+	 nop
+	_cfcmsa	t0, MSA_CSR
+	sw	t0, VCPU_MSA_CSR(k1)
+	_ctcmsa	MSA_CSR, zero
+1:
+#endif
+
 	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
 	.set	at
 	and	v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index c9eccf5..bb68e8d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -11,6 +11,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
@@ -48,6 +49,10 @@
 	{ "syscall",	  VCPU_STAT(syscall_exits),	 KVM_STAT_VCPU },
 	{ "resvd_inst",	  VCPU_STAT(resvd_inst_exits),	 KVM_STAT_VCPU },
 	{ "break_inst",	  VCPU_STAT(break_inst_exits),	 KVM_STAT_VCPU },
+	{ "trap_inst",	  VCPU_STAT(trap_inst_exits),	 KVM_STAT_VCPU },
+	{ "msa_fpe",	  VCPU_STAT(msa_fpe_exits),	 KVM_STAT_VCPU },
+	{ "fpe",	  VCPU_STAT(fpe_exits),		 KVM_STAT_VCPU },
+	{ "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
 	{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
 	{ "halt_wakeup",  VCPU_STAT(halt_wakeup),	 KVM_STAT_VCPU },
@@ -504,10 +509,13 @@
 	KVM_REG_MIPS_CP0_STATUS,
 	KVM_REG_MIPS_CP0_CAUSE,
 	KVM_REG_MIPS_CP0_EPC,
+	KVM_REG_MIPS_CP0_PRID,
 	KVM_REG_MIPS_CP0_CONFIG,
 	KVM_REG_MIPS_CP0_CONFIG1,
 	KVM_REG_MIPS_CP0_CONFIG2,
 	KVM_REG_MIPS_CP0_CONFIG3,
+	KVM_REG_MIPS_CP0_CONFIG4,
+	KVM_REG_MIPS_CP0_CONFIG5,
 	KVM_REG_MIPS_CP0_CONFIG7,
 	KVM_REG_MIPS_CP0_ERROREPC,
 
@@ -520,10 +528,14 @@
 			    const struct kvm_one_reg *reg)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
 	int ret;
 	s64 v;
+	s64 vs[2];
+	unsigned int idx;
 
 	switch (reg->id) {
+	/* General purpose registers */
 	case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
 		v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
 		break;
@@ -537,6 +549,67 @@
 		v = (long)vcpu->arch.pc;
 		break;
 
+	/* Floating point registers */
+	case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+		/* Odd singles in top of even double when FR=0 */
+		if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+			v = get_fpr32(&fpu->fpr[idx], 0);
+		else
+			v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1);
+		break;
+	case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+		/* Can't access odd doubles in FR=0 mode */
+		if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+			return -EINVAL;
+		v = get_fpr64(&fpu->fpr[idx], 0);
+		break;
+	case KVM_REG_MIPS_FCR_IR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		v = boot_cpu_data.fpu_id;
+		break;
+	case KVM_REG_MIPS_FCR_CSR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		v = fpu->fcr31;
+		break;
+
+	/* MIPS SIMD Architecture (MSA) registers */
+	case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		/* Can't access MSA registers in FR=0 mode */
+		if (!(kvm_read_c0_guest_status(cop0) & ST0_FR))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+		/* least significant byte first */
+		vs[0] = get_fpr64(&fpu->fpr[idx], 0);
+		vs[1] = get_fpr64(&fpu->fpr[idx], 1);
+#else
+		/* most significant byte first */
+		vs[0] = get_fpr64(&fpu->fpr[idx], 1);
+		vs[1] = get_fpr64(&fpu->fpr[idx], 0);
+#endif
+		break;
+	case KVM_REG_MIPS_MSA_IR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		v = boot_cpu_data.msa_id;
+		break;
+	case KVM_REG_MIPS_MSA_CSR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		v = fpu->msacsr;
+		break;
+
+	/* Co-processor 0 registers */
 	case KVM_REG_MIPS_CP0_INDEX:
 		v = (long)kvm_read_c0_guest_index(cop0);
 		break;
@@ -573,8 +646,8 @@
 	case KVM_REG_MIPS_CP0_EPC:
 		v = (long)kvm_read_c0_guest_epc(cop0);
 		break;
-	case KVM_REG_MIPS_CP0_ERROREPC:
-		v = (long)kvm_read_c0_guest_errorepc(cop0);
+	case KVM_REG_MIPS_CP0_PRID:
+		v = (long)kvm_read_c0_guest_prid(cop0);
 		break;
 	case KVM_REG_MIPS_CP0_CONFIG:
 		v = (long)kvm_read_c0_guest_config(cop0);
@@ -588,9 +661,18 @@
 	case KVM_REG_MIPS_CP0_CONFIG3:
 		v = (long)kvm_read_c0_guest_config3(cop0);
 		break;
+	case KVM_REG_MIPS_CP0_CONFIG4:
+		v = (long)kvm_read_c0_guest_config4(cop0);
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG5:
+		v = (long)kvm_read_c0_guest_config5(cop0);
+		break;
 	case KVM_REG_MIPS_CP0_CONFIG7:
 		v = (long)kvm_read_c0_guest_config7(cop0);
 		break;
+	case KVM_REG_MIPS_CP0_ERROREPC:
+		v = (long)kvm_read_c0_guest_errorepc(cop0);
+		break;
 	/* registers to be handled specially */
 	case KVM_REG_MIPS_CP0_COUNT:
 	case KVM_REG_MIPS_COUNT_CTL:
@@ -612,6 +694,10 @@
 		u32 v32 = (u32)v;
 
 		return put_user(v32, uaddr32);
+	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+		void __user *uaddr = (void __user *)(long)reg->addr;
+
+		return copy_to_user(uaddr, vs, 16);
 	} else {
 		return -EINVAL;
 	}
@@ -621,7 +707,10 @@
 			    const struct kvm_one_reg *reg)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	u64 v;
+	struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+	s64 v;
+	s64 vs[2];
+	unsigned int idx;
 
 	if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
 		u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
@@ -635,11 +724,16 @@
 		if (get_user(v32, uaddr32) != 0)
 			return -EFAULT;
 		v = (s64)v32;
+	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+		void __user *uaddr = (void __user *)(long)reg->addr;
+
+		return copy_from_user(vs, uaddr, 16);
 	} else {
 		return -EINVAL;
 	}
 
 	switch (reg->id) {
+	/* General purpose registers */
 	case KVM_REG_MIPS_R0:
 		/* Silently ignore requests to set $0 */
 		break;
@@ -656,6 +750,64 @@
 		vcpu->arch.pc = v;
 		break;
 
+	/* Floating point registers */
+	case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+		/* Odd singles in top of even double when FR=0 */
+		if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+			set_fpr32(&fpu->fpr[idx], 0, v);
+		else
+			set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v);
+		break;
+	case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+		/* Can't access odd doubles in FR=0 mode */
+		if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+			return -EINVAL;
+		set_fpr64(&fpu->fpr[idx], 0, v);
+		break;
+	case KVM_REG_MIPS_FCR_IR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		/* Read-only */
+		break;
+	case KVM_REG_MIPS_FCR_CSR:
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+			return -EINVAL;
+		fpu->fcr31 = v;
+		break;
+
+	/* MIPS SIMD Architecture (MSA) registers */
+	case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+		/* least significant byte first */
+		set_fpr64(&fpu->fpr[idx], 0, vs[0]);
+		set_fpr64(&fpu->fpr[idx], 1, vs[1]);
+#else
+		/* most significant byte first */
+		set_fpr64(&fpu->fpr[idx], 1, vs[0]);
+		set_fpr64(&fpu->fpr[idx], 0, vs[1]);
+#endif
+		break;
+	case KVM_REG_MIPS_MSA_IR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		/* Read-only */
+		break;
+	case KVM_REG_MIPS_MSA_CSR:
+		if (!kvm_mips_guest_has_msa(&vcpu->arch))
+			return -EINVAL;
+		fpu->msacsr = v;
+		break;
+
+	/* Co-processor 0 registers */
 	case KVM_REG_MIPS_CP0_INDEX:
 		kvm_write_c0_guest_index(cop0, v);
 		break;
@@ -686,6 +838,9 @@
 	case KVM_REG_MIPS_CP0_EPC:
 		kvm_write_c0_guest_epc(cop0, v);
 		break;
+	case KVM_REG_MIPS_CP0_PRID:
+		kvm_write_c0_guest_prid(cop0, v);
+		break;
 	case KVM_REG_MIPS_CP0_ERROREPC:
 		kvm_write_c0_guest_errorepc(cop0, v);
 		break;
@@ -693,6 +848,12 @@
 	case KVM_REG_MIPS_CP0_COUNT:
 	case KVM_REG_MIPS_CP0_COMPARE:
 	case KVM_REG_MIPS_CP0_CAUSE:
+	case KVM_REG_MIPS_CP0_CONFIG:
+	case KVM_REG_MIPS_CP0_CONFIG1:
+	case KVM_REG_MIPS_CP0_CONFIG2:
+	case KVM_REG_MIPS_CP0_CONFIG3:
+	case KVM_REG_MIPS_CP0_CONFIG4:
+	case KVM_REG_MIPS_CP0_CONFIG5:
 	case KVM_REG_MIPS_COUNT_CTL:
 	case KVM_REG_MIPS_COUNT_RESUME:
 	case KVM_REG_MIPS_COUNT_HZ:
@@ -703,6 +864,33 @@
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	int r = 0;
+
+	if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap))
+		return -EINVAL;
+	if (cap->flags)
+		return -EINVAL;
+	if (cap->args[0])
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_MIPS_FPU:
+		vcpu->arch.fpu_enabled = true;
+		break;
+	case KVM_CAP_MIPS_MSA:
+		vcpu->arch.msa_enabled = true;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 			 unsigned long arg)
 {
@@ -760,6 +948,15 @@
 			r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
 			break;
 		}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
 	default:
 		r = -ENOIOCTLCMD;
 	}
@@ -868,11 +1065,30 @@
 
 	switch (ext) {
 	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ENABLE_CAP:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+	case KVM_CAP_MIPS_FPU:
+		r = !!cpu_has_fpu;
+		break;
+	case KVM_CAP_MIPS_MSA:
+		/*
+		 * We don't support MSA vector partitioning yet:
+		 * 1) It would require explicit support which can't be tested
+		 *    yet due to lack of support in current hardware.
+		 * 2) It extends the state that would need to be saved/restored
+		 *    by e.g. QEMU for migration.
+		 *
+		 * When vector partitioning hardware becomes available, support
+		 * could be added by requiring a flag when enabling
+		 * KVM_CAP_MIPS_MSA capability to indicate that userland knows
+		 * to save/restore the appropriate extra state.
+		 */
+		r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
+		break;
 	default:
 		r = 0;
 		break;
@@ -1119,6 +1335,30 @@
 		ret = kvm_mips_callbacks->handle_break(vcpu);
 		break;
 
+	case T_TRAP:
+		++vcpu->stat.trap_inst_exits;
+		trace_kvm_exit(vcpu, TRAP_INST_EXITS);
+		ret = kvm_mips_callbacks->handle_trap(vcpu);
+		break;
+
+	case T_MSAFPE:
+		++vcpu->stat.msa_fpe_exits;
+		trace_kvm_exit(vcpu, MSA_FPE_EXITS);
+		ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
+		break;
+
+	case T_FPE:
+		++vcpu->stat.fpe_exits;
+		trace_kvm_exit(vcpu, FPE_EXITS);
+		ret = kvm_mips_callbacks->handle_fpe(vcpu);
+		break;
+
+	case T_MSADIS:
+		++vcpu->stat.msa_disabled_exits;
+		trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
+		ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
+		break;
+
 	default:
 		kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x  BadVaddr: %#lx Status: %#lx\n",
 			exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
@@ -1146,12 +1386,233 @@
 		}
 	}
 
+	if (ret == RESUME_GUEST) {
+		/*
+		 * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
+		 * is live), restore FCR31 / MSACSR.
+		 *
+		 * This should be before returning to the guest exception
+		 * vector, as it may well cause an [MSA] FP exception if there
+		 * are pending exception bits unmasked. (see
+		 * kvm_mips_csr_die_notifier() for how that is handled).
+		 */
+		if (kvm_mips_guest_has_fpu(&vcpu->arch) &&
+		    read_c0_status() & ST0_CU1)
+			__kvm_restore_fcsr(&vcpu->arch);
+
+		if (kvm_mips_guest_has_msa(&vcpu->arch) &&
+		    read_c0_config5() & MIPS_CONF5_MSAEN)
+			__kvm_restore_msacsr(&vcpu->arch);
+	}
+
 	/* Disable HTW before returning to guest or host */
 	htw_stop();
 
 	return ret;
 }
 
+/* Enable FPU for guest and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	unsigned int sr, cfg5;
+
+	preempt_disable();
+
+	sr = kvm_read_c0_guest_status(cop0);
+
+	/*
+	 * If MSA state is already live, it is undefined how it interacts with
+	 * FR=0 FPU state, and we don't want to hit reserved instruction
+	 * exceptions trying to save the MSA state later when CU=1 && FR=1, so
+	 * play it safe and save it first.
+	 *
+	 * In theory we shouldn't ever hit this case since kvm_lose_fpu() should
+	 * get called when guest CU1 is set, however we can't trust the guest
+	 * not to clobber the status register directly via the commpage.
+	 */
+	if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) &&
+	    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+		kvm_lose_fpu(vcpu);
+
+	/*
+	 * Enable FPU for guest
+	 * We set FR and FRE according to guest context
+	 */
+	change_c0_status(ST0_CU1 | ST0_FR, sr);
+	if (cpu_has_fre) {
+		cfg5 = kvm_read_c0_guest_config5(cop0);
+		change_c0_config5(MIPS_CONF5_FRE, cfg5);
+	}
+	enable_fpu_hazard();
+
+	/* If guest FPU state not active, restore it now */
+	if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+		__kvm_restore_fpu(&vcpu->arch);
+		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+	}
+
+	preempt_enable();
+}
+
+#ifdef CONFIG_CPU_HAS_MSA
+/* Enable MSA for guest and restore context */
+void kvm_own_msa(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	unsigned int sr, cfg5;
+
+	preempt_disable();
+
+	/*
+	 * Enable FPU if enabled in guest, since we're restoring FPU context
+	 * anyway. We set FR and FRE according to guest context.
+	 */
+	if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+		sr = kvm_read_c0_guest_status(cop0);
+
+		/*
+		 * If FR=0 FPU state is already live, it is undefined how it
+		 * interacts with MSA state, so play it safe and save it first.
+		 */
+		if (!(sr & ST0_FR) &&
+		    (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU |
+				KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU)
+			kvm_lose_fpu(vcpu);
+
+		change_c0_status(ST0_CU1 | ST0_FR, sr);
+		if (sr & ST0_CU1 && cpu_has_fre) {
+			cfg5 = kvm_read_c0_guest_config5(cop0);
+			change_c0_config5(MIPS_CONF5_FRE, cfg5);
+		}
+	}
+
+	/* Enable MSA for guest */
+	set_c0_config5(MIPS_CONF5_MSAEN);
+	enable_fpu_hazard();
+
+	switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) {
+	case KVM_MIPS_FPU_FPU:
+		/*
+		 * Guest FPU state already loaded, only restore upper MSA state
+		 */
+		__kvm_restore_msa_upper(&vcpu->arch);
+		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+		break;
+	case 0:
+		/* Neither FPU or MSA already active, restore full MSA state */
+		__kvm_restore_msa(&vcpu->arch);
+		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+		if (kvm_mips_guest_has_fpu(&vcpu->arch))
+			vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+		break;
+	default:
+		break;
+	}
+
+	preempt_enable();
+}
+#endif
+
+/* Drop FPU & MSA without saving it */
+void kvm_drop_fpu(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+		disable_msa();
+		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA;
+	}
+	if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+		clear_c0_status(ST0_CU1 | ST0_FR);
+		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+	}
+	preempt_enable();
+}
+
+/* Save and disable FPU & MSA */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * FPU & MSA get disabled in root context (hardware) when it is disabled
+	 * in guest context (software), but the register state in the hardware
+	 * may still be in use. This is why we explicitly re-enable the hardware
+	 * before saving.
+	 */
+
+	preempt_disable();
+	if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+		set_c0_config5(MIPS_CONF5_MSAEN);
+		enable_fpu_hazard();
+
+		__kvm_save_msa(&vcpu->arch);
+
+		/* Disable MSA & FPU */
+		disable_msa();
+		if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+			clear_c0_status(ST0_CU1 | ST0_FR);
+		vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
+	} else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+		set_c0_status(ST0_CU1);
+		enable_fpu_hazard();
+
+		__kvm_save_fpu(&vcpu->arch);
+		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+
+		/* Disable FPU */
+		clear_c0_status(ST0_CU1 | ST0_FR);
+	}
+	preempt_enable();
+}
+
+/*
+ * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are
+ * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP
+ * exception if cause bits are set in the value being written.
+ */
+static int kvm_mips_csr_die_notify(struct notifier_block *self,
+				   unsigned long cmd, void *ptr)
+{
+	struct die_args *args = (struct die_args *)ptr;
+	struct pt_regs *regs = args->regs;
+	unsigned long pc;
+
+	/* Only interested in FPE and MSAFPE */
+	if (cmd != DIE_FP && cmd != DIE_MSAFP)
+		return NOTIFY_DONE;
+
+	/* Return immediately if guest context isn't active */
+	if (!(current->flags & PF_VCPU))
+		return NOTIFY_DONE;
+
+	/* Should never get here from user mode */
+	BUG_ON(user_mode(regs));
+
+	pc = instruction_pointer(regs);
+	switch (cmd) {
+	case DIE_FP:
+		/* match 2nd instruction in __kvm_restore_fcsr */
+		if (pc != (unsigned long)&__kvm_restore_fcsr + 4)
+			return NOTIFY_DONE;
+		break;
+	case DIE_MSAFP:
+		/* match 2nd/3rd instruction in __kvm_restore_msacsr */
+		if (!cpu_has_msa ||
+		    pc < (unsigned long)&__kvm_restore_msacsr + 4 ||
+		    pc > (unsigned long)&__kvm_restore_msacsr + 8)
+			return NOTIFY_DONE;
+		break;
+	}
+
+	/* Move PC forward a little and continue executing */
+	instruction_pointer(regs) += 4;
+
+	return NOTIFY_STOP;
+}
+
+static struct notifier_block kvm_mips_csr_die_notifier = {
+	.notifier_call = kvm_mips_csr_die_notify,
+};
+
 int __init kvm_mips_init(void)
 {
 	int ret;
@@ -1161,6 +1622,8 @@
 	if (ret)
 		return ret;
 
+	register_die_notifier(&kvm_mips_csr_die_notifier);
+
 	/*
 	 * On MIPS, kernel modules are executed from "mapped space", which
 	 * requires TLBs. The TLB handling code is statically linked with
@@ -1173,7 +1636,6 @@
 	kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
 	kvm_mips_is_error_pfn = is_error_pfn;
 
-	pr_info("KVM/MIPS Initialized\n");
 	return 0;
 }
 
@@ -1185,7 +1647,7 @@
 	kvm_mips_release_pfn_clean = NULL;
 	kvm_mips_is_error_pfn = NULL;
 
-	pr_info("KVM/MIPS unloaded\n");
+	unregister_die_notifier(&kvm_mips_csr_die_notifier);
 }
 
 module_init(kvm_mips_init);
diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S
new file mode 100644
index 0000000..d02f0c6
--- /dev/null
+++ b/arch/mips/kvm/msa.S
@@ -0,0 +1,161 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * MIPS SIMD Architecture (MSA) context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+
+	.set	noreorder
+	.set	noat
+
+LEAF(__kvm_save_msa)
+	st_d	0,  VCPU_FPR0,  a0
+	st_d	1,  VCPU_FPR1,  a0
+	st_d	2,  VCPU_FPR2,  a0
+	st_d	3,  VCPU_FPR3,  a0
+	st_d	4,  VCPU_FPR4,  a0
+	st_d	5,  VCPU_FPR5,  a0
+	st_d	6,  VCPU_FPR6,  a0
+	st_d	7,  VCPU_FPR7,  a0
+	st_d	8,  VCPU_FPR8,  a0
+	st_d	9,  VCPU_FPR9,  a0
+	st_d	10, VCPU_FPR10, a0
+	st_d	11, VCPU_FPR11, a0
+	st_d	12, VCPU_FPR12, a0
+	st_d	13, VCPU_FPR13, a0
+	st_d	14, VCPU_FPR14, a0
+	st_d	15, VCPU_FPR15, a0
+	st_d	16, VCPU_FPR16, a0
+	st_d	17, VCPU_FPR17, a0
+	st_d	18, VCPU_FPR18, a0
+	st_d	19, VCPU_FPR19, a0
+	st_d	20, VCPU_FPR20, a0
+	st_d	21, VCPU_FPR21, a0
+	st_d	22, VCPU_FPR22, a0
+	st_d	23, VCPU_FPR23, a0
+	st_d	24, VCPU_FPR24, a0
+	st_d	25, VCPU_FPR25, a0
+	st_d	26, VCPU_FPR26, a0
+	st_d	27, VCPU_FPR27, a0
+	st_d	28, VCPU_FPR28, a0
+	st_d	29, VCPU_FPR29, a0
+	st_d	30, VCPU_FPR30, a0
+	st_d	31, VCPU_FPR31, a0
+	jr	ra
+	 nop
+	END(__kvm_save_msa)
+
+LEAF(__kvm_restore_msa)
+	ld_d	0,  VCPU_FPR0,  a0
+	ld_d	1,  VCPU_FPR1,  a0
+	ld_d	2,  VCPU_FPR2,  a0
+	ld_d	3,  VCPU_FPR3,  a0
+	ld_d	4,  VCPU_FPR4,  a0
+	ld_d	5,  VCPU_FPR5,  a0
+	ld_d	6,  VCPU_FPR6,  a0
+	ld_d	7,  VCPU_FPR7,  a0
+	ld_d	8,  VCPU_FPR8,  a0
+	ld_d	9,  VCPU_FPR9,  a0
+	ld_d	10, VCPU_FPR10, a0
+	ld_d	11, VCPU_FPR11, a0
+	ld_d	12, VCPU_FPR12, a0
+	ld_d	13, VCPU_FPR13, a0
+	ld_d	14, VCPU_FPR14, a0
+	ld_d	15, VCPU_FPR15, a0
+	ld_d	16, VCPU_FPR16, a0
+	ld_d	17, VCPU_FPR17, a0
+	ld_d	18, VCPU_FPR18, a0
+	ld_d	19, VCPU_FPR19, a0
+	ld_d	20, VCPU_FPR20, a0
+	ld_d	21, VCPU_FPR21, a0
+	ld_d	22, VCPU_FPR22, a0
+	ld_d	23, VCPU_FPR23, a0
+	ld_d	24, VCPU_FPR24, a0
+	ld_d	25, VCPU_FPR25, a0
+	ld_d	26, VCPU_FPR26, a0
+	ld_d	27, VCPU_FPR27, a0
+	ld_d	28, VCPU_FPR28, a0
+	ld_d	29, VCPU_FPR29, a0
+	ld_d	30, VCPU_FPR30, a0
+	ld_d	31, VCPU_FPR31, a0
+	jr	ra
+	 nop
+	END(__kvm_restore_msa)
+
+	.macro	kvm_restore_msa_upper	wr, off, base
+	.set	push
+	.set	noat
+#ifdef CONFIG_64BIT
+	ld	$1, \off(\base)
+	insert_d \wr, 1
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+	lw	$1, \off(\base)
+	insert_w \wr, 2
+	lw	$1, (\off+4)(\base)
+	insert_w \wr, 3
+#else /* CONFIG_CPU_BIG_ENDIAN */
+	lw	$1, (\off+4)(\base)
+	insert_w \wr, 2
+	lw	$1, \off(\base)
+	insert_w \wr, 3
+#endif
+	.set	pop
+	.endm
+
+LEAF(__kvm_restore_msa_upper)
+	kvm_restore_msa_upper	0,  VCPU_FPR0 +8, a0
+	kvm_restore_msa_upper	1,  VCPU_FPR1 +8, a0
+	kvm_restore_msa_upper	2,  VCPU_FPR2 +8, a0
+	kvm_restore_msa_upper	3,  VCPU_FPR3 +8, a0
+	kvm_restore_msa_upper	4,  VCPU_FPR4 +8, a0
+	kvm_restore_msa_upper	5,  VCPU_FPR5 +8, a0
+	kvm_restore_msa_upper	6,  VCPU_FPR6 +8, a0
+	kvm_restore_msa_upper	7,  VCPU_FPR7 +8, a0
+	kvm_restore_msa_upper	8,  VCPU_FPR8 +8, a0
+	kvm_restore_msa_upper	9,  VCPU_FPR9 +8, a0
+	kvm_restore_msa_upper	10, VCPU_FPR10+8, a0
+	kvm_restore_msa_upper	11, VCPU_FPR11+8, a0
+	kvm_restore_msa_upper	12, VCPU_FPR12+8, a0
+	kvm_restore_msa_upper	13, VCPU_FPR13+8, a0
+	kvm_restore_msa_upper	14, VCPU_FPR14+8, a0
+	kvm_restore_msa_upper	15, VCPU_FPR15+8, a0
+	kvm_restore_msa_upper	16, VCPU_FPR16+8, a0
+	kvm_restore_msa_upper	17, VCPU_FPR17+8, a0
+	kvm_restore_msa_upper	18, VCPU_FPR18+8, a0
+	kvm_restore_msa_upper	19, VCPU_FPR19+8, a0
+	kvm_restore_msa_upper	20, VCPU_FPR20+8, a0
+	kvm_restore_msa_upper	21, VCPU_FPR21+8, a0
+	kvm_restore_msa_upper	22, VCPU_FPR22+8, a0
+	kvm_restore_msa_upper	23, VCPU_FPR23+8, a0
+	kvm_restore_msa_upper	24, VCPU_FPR24+8, a0
+	kvm_restore_msa_upper	25, VCPU_FPR25+8, a0
+	kvm_restore_msa_upper	26, VCPU_FPR26+8, a0
+	kvm_restore_msa_upper	27, VCPU_FPR27+8, a0
+	kvm_restore_msa_upper	28, VCPU_FPR28+8, a0
+	kvm_restore_msa_upper	29, VCPU_FPR29+8, a0
+	kvm_restore_msa_upper	30, VCPU_FPR30+8, a0
+	kvm_restore_msa_upper	31, VCPU_FPR31+8, a0
+	jr	ra
+	 nop
+	END(__kvm_restore_msa_upper)
+
+LEAF(__kvm_restore_msacsr)
+	lw	t0, VCPU_MSA_CSR(a0)
+	/*
+	 * The ctcmsa must stay at this offset in __kvm_restore_msacsr.
+	 * See kvm_mips_csr_die_notify() which handles t0 containing a value
+	 * which triggers an MSA FP Exception, which must be stepped over and
+	 * ignored since the set cause bits must remain there for the guest.
+	 */
+	_ctcmsa	MSA_CSR, t0
+	jr	ra
+	 nop
+	END(__kvm_restore_msacsr)
diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c
index a74d602..888bb67 100644
--- a/arch/mips/kvm/stats.c
+++ b/arch/mips/kvm/stats.c
@@ -25,6 +25,10 @@
 	"System Call",
 	"Reserved Inst",
 	"Break Inst",
+	"Trap Inst",
+	"MSA FPE",
+	"FPE",
+	"MSA Disabled",
 	"D-Cache Flushes",
 };
 
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index b6beb0e..aed0ac2 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -733,6 +733,9 @@
 		}
 	}
 
+	/* restore guest state to registers */
+	kvm_mips_callbacks->vcpu_set_regs(vcpu);
+
 	local_irq_restore(flags);
 
 }
@@ -751,6 +754,9 @@
 	vcpu->arch.preempt_entryhi = read_c0_entryhi();
 	vcpu->arch.last_sched_cpu = cpu;
 
+	/* save guest state in registers */
+	kvm_mips_callbacks->vcpu_get_regs(vcpu);
+
 	if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
 	     ASID_VERSION_MASK)) {
 		kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index fd7257b..d836ed5 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -39,16 +39,30 @@
 
 static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
 {
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_run *run = vcpu->run;
 	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
 	unsigned long cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
-	if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1)
-		er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
-	else
+	if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+		/* FPU Unusable */
+		if (!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+		    (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) {
+			/*
+			 * Unusable/no FPU in guest:
+			 * deliver guest COP1 Unusable Exception
+			 */
+			er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
+		} else {
+			/* Restore FPU state */
+			kvm_own_fpu(vcpu);
+			er = EMULATE_DONE;
+		}
+	} else {
 		er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
+	}
 
 	switch (er) {
 	case EMULATE_DONE:
@@ -330,6 +344,107 @@
 	return ret;
 }
 
+static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu);
+	if (er == EMULATE_DONE) {
+		ret = RESUME_GUEST;
+	} else {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+	}
+	return ret;
+}
+
+static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu);
+	if (er == EMULATE_DONE) {
+		ret = RESUME_GUEST;
+	} else {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+	}
+	return ret;
+}
+
+static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu);
+	if (er == EMULATE_DONE) {
+		ret = RESUME_GUEST;
+	} else {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+	}
+	return ret;
+}
+
+/**
+ * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu:	Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled.
+ */
+static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+	    (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) {
+		/*
+		 * No MSA in guest, or FPU enabled and not in FR=1 mode,
+		 * guest reserved instruction exception
+		 */
+		er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
+	} else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) {
+		/* MSA disabled by guest, guest MSA disabled exception */
+		er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu);
+	} else {
+		/* Restore MSA/FPU state */
+		kvm_own_msa(vcpu);
+		er = EMULATE_DONE;
+	}
+
+	switch (er) {
+	case EMULATE_DONE:
+		ret = RESUME_GUEST;
+		break;
+
+	case EMULATE_FAIL:
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+		break;
+
+	default:
+		BUG();
+	}
+	return ret;
+}
+
 static int kvm_trap_emul_vm_init(struct kvm *kvm)
 {
 	return 0;
@@ -351,8 +466,9 @@
 	 * guest will come up as expected, for now we simulate a MIPS 24kc
 	 */
 	kvm_write_c0_guest_prid(cop0, 0x00019300);
-	kvm_write_c0_guest_config(cop0,
-				  MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+	/* Have config1, Cacheable, noncoherent, write-back, write allocate */
+	kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) |
+				  (0x1 << CP0C0_AR) |
 				  (MMU_TYPE_R4000 << CP0C0_MT));
 
 	/* Read the cache characteristics from the host Config1 Register */
@@ -368,10 +484,18 @@
 	      (1 << CP0C1_WR) | (1 << CP0C1_CA));
 	kvm_write_c0_guest_config1(cop0, config1);
 
-	kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2);
-	/* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */
-	kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) |
-					 (1 << CP0C3_ULRI));
+	/* Have config3, no tertiary/secondary caches implemented */
+	kvm_write_c0_guest_config2(cop0, MIPS_CONF_M);
+	/* MIPS_CONF_M | (read_c0_config2() & 0xfff) */
+
+	/* Have config4, UserLocal */
+	kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI);
+
+	/* Have config5 */
+	kvm_write_c0_guest_config4(cop0, MIPS_CONF_M);
+
+	/* No config6 */
+	kvm_write_c0_guest_config5(cop0, 0);
 
 	/* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
 	kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
@@ -416,6 +540,7 @@
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	int ret = 0;
+	unsigned int cur, change;
 
 	switch (reg->id) {
 	case KVM_REG_MIPS_CP0_COUNT:
@@ -444,6 +569,44 @@
 			kvm_write_c0_guest_cause(cop0, v);
 		}
 		break;
+	case KVM_REG_MIPS_CP0_CONFIG:
+		/* read-only for now */
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG1:
+		cur = kvm_read_c0_guest_config1(cop0);
+		change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config1(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG2:
+		/* read-only for now */
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG3:
+		cur = kvm_read_c0_guest_config3(cop0);
+		change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config3(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG4:
+		cur = kvm_read_c0_guest_config4(cop0);
+		change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config4(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_CP0_CONFIG5:
+		cur = kvm_read_c0_guest_config5(cop0);
+		change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu);
+		if (change) {
+			v = cur ^ change;
+			kvm_write_c0_guest_config5(cop0, v);
+		}
+		break;
 	case KVM_REG_MIPS_COUNT_CTL:
 		ret = kvm_mips_set_count_ctl(vcpu, v);
 		break;
@@ -459,6 +622,18 @@
 	return ret;
 }
 
+static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
+{
+	kvm_lose_fpu(vcpu);
+
+	return 0;
+}
+
+static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	/* exit handlers */
 	.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -470,6 +645,10 @@
 	.handle_syscall = kvm_trap_emul_handle_syscall,
 	.handle_res_inst = kvm_trap_emul_handle_res_inst,
 	.handle_break = kvm_trap_emul_handle_break,
+	.handle_trap = kvm_trap_emul_handle_trap,
+	.handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
+	.handle_fpe = kvm_trap_emul_handle_fpe,
+	.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
 
 	.vm_init = kvm_trap_emul_vm_init,
 	.vcpu_init = kvm_trap_emul_vcpu_init,
@@ -483,6 +662,8 @@
 	.irq_clear = kvm_mips_irq_clear_cb,
 	.get_one_reg = kvm_trap_emul_get_one_reg,
 	.set_one_reg = kvm_trap_emul_set_one_reg,
+	.vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
+	.vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
 };
 
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3b7f65c..cf9b4633 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -75,11 +75,11 @@
 int proc_dolasatrtc(struct ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 	int r;
 
 	if (!write) {
-		read_persistent_clock(&ts);
+		read_persistent_clock64(&ts);
 		rtctmp = ts.tv_sec;
 		/* check for time < 0 and set to 0 */
 		if (rtctmp < 0)
diff --git a/arch/mips/loongson/loongson-3/hpet.c b/arch/mips/loongson/loongson-3/hpet.c
index e898d68..5c21cd3 100644
--- a/arch/mips/loongson/loongson-3/hpet.c
+++ b/arch/mips/loongson/loongson-3/hpet.c
@@ -162,7 +162,7 @@
 
 static struct irqaction hpet_irq = {
 	.handler = hpet_irq_handler,
-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
+	.flags = IRQF_NOBALANCING | IRQF_TIMER,
 	.name = "hpet",
 };
 
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 1bf60b1..8bb13a4 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -94,27 +94,29 @@
 	pci_add_resource_offset(&resources, hose->io_resource, hose->io_offset);
 	bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
 				&resources);
-	if (!bus)
-		pci_free_resource_list(&resources);
-
 	hose->bus = bus;
 
 	need_domain_info = need_domain_info || hose->index;
 	hose->need_domain_info = need_domain_info;
-	if (bus) {
-		next_busno = bus->busn_res.end + 1;
-		/* Don't allow 8-bit bus number overflow inside the hose -
-		   reserve some space for bridges. */
-		if (next_busno > 224) {
-			next_busno = 0;
-			need_domain_info = 1;
-		}
 
-		if (!pci_has_flag(PCI_PROBE_ONLY)) {
-			pci_bus_size_bridges(bus);
-			pci_bus_assign_resources(bus);
-		}
+	if (!bus) {
+		pci_free_resource_list(&resources);
+		return;
 	}
+
+	next_busno = bus->busn_res.end + 1;
+	/* Don't allow 8-bit bus number overflow inside the hose -
+	   reserve some space for bridges. */
+	if (next_busno > 224) {
+		next_busno = 0;
+		need_domain_info = 1;
+	}
+
+	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+	}
+	pci_bus_add_devices(bus);
 }
 
 #ifdef CONFIG_OF
diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c
index 613ca1e..3dfe2d3 100644
--- a/arch/mn10300/unit-asb2305/pci.c
+++ b/arch/mn10300/unit-asb2305/pci.c
@@ -342,6 +342,7 @@
 {
 	resource_size_t io_offset, mem_offset;
 	LIST_HEAD(resources);
+	struct pci_bus *bus;
 
 	ioport_resource.start	= 0xA0000000;
 	ioport_resource.end	= 0xDFFFFFFF;
@@ -371,11 +372,14 @@
 
 	pci_add_resource_offset(&resources, &pci_ioport_resource, io_offset);
 	pci_add_resource_offset(&resources, &pci_iomem_resource, mem_offset);
-	pci_scan_root_bus(NULL, 0, &pci_direct_ampci, NULL, &resources);
+	bus = pci_scan_root_bus(NULL, 0, &pci_direct_ampci, NULL, &resources);
+	if (!bus)
+		return 0;
 
 	pcibios_irq_init();
 	pcibios_fixup_irqs();
 	pcibios_resource_survey();
+	pci_bus_add_devices(bus);
 	return 0;
 }
 
diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h
index 1f26657..a16e55c 100644
--- a/arch/nios2/include/asm/thread_info.h
+++ b/arch/nios2/include/asm/thread_info.h
@@ -47,7 +47,6 @@
 						  0-0x7FFFFFFF for user-thead
 						  0-0xFFFFFFFF for kernel-thread
 						*/
-	struct restart_block	restart_block;
 	struct pt_regs		*regs;
 };
 
@@ -64,9 +63,6 @@
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
-	.restart_block	= {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
diff --git a/arch/nios2/include/uapi/asm/ptrace.h b/arch/nios2/include/uapi/asm/ptrace.h
index 71a3305..eff00e67 100644
--- a/arch/nios2/include/uapi/asm/ptrace.h
+++ b/arch/nios2/include/uapi/asm/ptrace.h
@@ -60,12 +60,17 @@
 #define PTR_IPENDING	37
 #define PTR_CPUID	38
 #define PTR_CTL6	39
-#define PTR_CTL7	40
+#define PTR_EXCEPTION	40
 #define PTR_PTEADDR	41
 #define PTR_TLBACC	42
 #define PTR_TLBMISC	43
+#define PTR_ECCINJ	44
+#define PTR_BADADDR	45
+#define PTR_CONFIG	46
+#define PTR_MPUBASE	47
+#define PTR_MPUACC	48
 
-#define NUM_PTRACE_REG (PTR_TLBMISC + 1)
+#define NUM_PTRACE_REG (PTR_MPUACC + 1)
 
 /* User structures for general purpose registers.  */
 struct user_pt_regs {
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
index 7729bd3..27b006c 100644
--- a/arch/nios2/kernel/entry.S
+++ b/arch/nios2/kernel/entry.S
@@ -161,7 +161,7 @@
  ***********************************************************************
  */
 ENTRY(handle_trap)
-	ldw	r24, -4(ea)	/* instruction that caused the exception */
+	ldwio	r24, -4(ea)	/* instruction that caused the exception */
 	srli	r24, r24, 4
 	andi	r24, r24, 0x7c
 	movia	r9,trap_table
diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
index dda41e4..20662b0 100644
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@@ -43,7 +43,7 @@
 	int err;
 
 	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+	current->restart_block.fn = do_no_restart_syscall;
 
 	err = __get_user(temp, &uc->uc_mcontext.version);
 	if (temp != MCONTEXT_VERSION)
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 2ae482b4..7966429 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -23,9 +23,6 @@
 	end += (cpuinfo.dcache_line_size - 1);
 	end &= ~(cpuinfo.dcache_line_size - 1);
 
-	if (end > start + cpuinfo.dcache_size)
-		end = start + cpuinfo.dcache_size;
-
 	for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
 		__asm__ __volatile__ ("   flushda 0(%0)\n"
 					: /* Outputs */
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 39b3a8f..6249cdc 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -34,7 +34,7 @@
 #include <asm/kvm_para.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_ppc.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #define MAX_CPU     32
 #define MAX_SRC     256
@@ -289,11 +289,6 @@
 	clear_bit(n_IRQ, q->queue);
 }
 
-static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
-{
-	return test_bit(n_IRQ, q->queue);
-}
-
 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
 {
 	int irq = -1;
@@ -1374,8 +1369,9 @@
 	return -ENXIO;
 }
 
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
-			 int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+			 struct kvm_io_device *this,
+			 gpa_t addr, int len, void *ptr)
 {
 	struct openpic *opp = container_of(this, struct openpic, mmio);
 	int ret;
@@ -1415,8 +1411,9 @@
 	return ret;
 }
 
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
-			  int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+			  struct kvm_io_device *this,
+			  gpa_t addr, int len, const void *ptr)
 {
 	struct openpic *opp = container_of(this, struct openpic, mmio);
 	int ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 27c0fac..24bfe40 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -807,7 +807,7 @@
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+	ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
 			      bytes, &run->mmio.data);
 
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -880,7 +880,7 @@
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+	ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
 			       bytes, &run->mmio.data);
 
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0509bca..fcbe899 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -9,11 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/jump_label.h>
 #include <asm/ppc_asm.h>
 #include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/opal.h>
-#include <asm/jump_label.h>
 
 	.section	".text"
 
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index ccd53f9..74b5b8e 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <linux/jump_label.h>
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/jump_label.h>
 
 	.section	".text"
 	
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b5682fd..b7a67e3 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -26,7 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/console.h>
 #include <linux/export.h>
-#include <linux/static_key.h>
+#include <linux/jump_label.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 58642fd..2b77e23 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_S390_JUMP_LABEL_H
 #define _ASM_S390_JUMP_LABEL_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 6
@@ -39,4 +41,5 @@
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f407bbf..d01fc58 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -172,7 +172,9 @@
 	__u32	fac;			/* 0x01a0 */
 	__u8	reserved1a4[20];	/* 0x01a4 */
 	__u64	cbrlo;			/* 0x01b8 */
-	__u8	reserved1c0[30];	/* 0x01c0 */
+	__u8	reserved1c0[8];		/* 0x01c0 */
+	__u32	ecd;			/* 0x01c8 */
+	__u8	reserved1cc[18];	/* 0x01cc */
 	__u64	pp;			/* 0x01de */
 	__u8	reserved1e6[2];		/* 0x01e6 */
 	__u64	itdba;			/* 0x01e8 */
@@ -183,11 +185,17 @@
 	__u8	data[256];
 } __packed;
 
+struct kvm_s390_vregs {
+	__vector128 vrs[32];
+	__u8	reserved200[512];	/* for future vector expansion */
+} __packed;
+
 struct sie_page {
 	struct kvm_s390_sie_block sie_block;
 	__u8 reserved200[1024];		/* 0x0200 */
 	struct kvm_s390_itdb itdb;	/* 0x0600 */
-	__u8 reserved700[2304];		/* 0x0700 */
+	__u8 reserved700[1280];		/* 0x0700 */
+	struct kvm_s390_vregs vregs;	/* 0x0c00 */
 } __packed;
 
 struct kvm_vcpu_stat {
@@ -238,6 +246,7 @@
 	u32 instruction_sigp_stop;
 	u32 instruction_sigp_stop_store_status;
 	u32 instruction_sigp_store_status;
+	u32 instruction_sigp_store_adtl_status;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
@@ -270,6 +279,7 @@
 #define PGM_SPECIAL_OPERATION		0x13
 #define PGM_OPERAND			0x15
 #define PGM_TRACE_TABEL			0x16
+#define PGM_VECTOR_PROCESSING		0x1b
 #define PGM_SPACE_SWITCH		0x1c
 #define PGM_HFP_SQUARE_ROOT		0x1d
 #define PGM_PC_TRANSLATION_SPEC		0x1f
@@ -334,6 +344,11 @@
 	IRQ_PEND_COUNT
 };
 
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
 /*
  * Repressible (non-floating) machine check interrupts
  * subclass bits in MCIC
@@ -411,13 +426,32 @@
 	unsigned long pending_irqs;
 };
 
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT   8
+#define FIRQ_LIST_VIRTIO   9
+#define FIRQ_LIST_COUNT   10
+#define FIRQ_CNTR_IO       0
+#define FIRQ_CNTR_SERVICE  1
+#define FIRQ_CNTR_VIRTIO   2
+#define FIRQ_CNTR_PFAULT   3
+#define FIRQ_MAX_COUNT     4
+
 struct kvm_s390_float_interrupt {
+	unsigned long pending_irqs;
 	spinlock_t lock;
-	struct list_head list;
-	atomic_t active;
+	struct list_head lists[FIRQ_LIST_COUNT];
+	int counters[FIRQ_MAX_COUNT];
+	struct kvm_s390_mchk_info mchk;
+	struct kvm_s390_ext_info srv_signal;
 	int next_rr_cpu;
 	unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
-	unsigned int irq_count;
 };
 
 struct kvm_hw_wp_info_arch {
@@ -465,6 +499,7 @@
 	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
 	s390_fp_regs      guest_fpregs;
+	struct kvm_s390_vregs	*host_vregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
@@ -553,6 +588,7 @@
 	int use_cmma;
 	int user_cpu_state_ctrl;
 	int user_sigp;
+	int user_stsi;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	int ipte_lock_count;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9c77e60..ef1a5fc 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -150,6 +150,7 @@
 #define KVM_SYNC_CRS    (1UL << 3)
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -164,6 +165,9 @@
 	__u64 pft;	/* pfault token [PFAULT] */
 	__u64 pfs;	/* pfault select [PFAULT] */
 	__u64 pfc;	/* pfault compare [PFAULT] */
+	__u64 vrs[32][2];	/* vector registers */
+	__u8  reserved[512];	/* for future vector expansion */
+	__u32 fpc;	/* only valid with vector registers */
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index d4096fd..ee69c08 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -230,7 +230,7 @@
  * and returns a key, which can be used to find a mnemonic name
  * of the instruction in the icpt_insn_codes table.
  */
-#define icpt_insn_decoder(insn)			\
+#define icpt_insn_decoder(insn) (		\
 	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
 	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
 	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
@@ -239,6 +239,6 @@
 	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
 	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
 	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
-	INSN_DECODE(insn)
+	INSN_DECODE(insn))
 
 #endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e07e9160..8dc4db1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -171,6 +171,7 @@
 #else /* CONFIG_32BIT */
 	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
 	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+	DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
 	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 20660dd..170ddd2 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@
 {
 	u64 nsecps;
 
-	if (tk->tkr.clock != &clocksource_tod)
+	if (tk->tkr_mono.clock != &clocksource_tod)
 		return;
 
 	/* Make userspace gettimeofday spin until we're done. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+	vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+	vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
 	vdso_data->wtom_clock_sec =
 		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
-		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
-	nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+	vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+	nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
 	while (vdso_data->wtom_clock_nsec >= nsecps) {
 		vdso_data->wtom_clock_nsec -= nsecps;
 		vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@
 
 	vdso_data->xtime_coarse_sec = tk->xtime_sec;
 	vdso_data->xtime_coarse_nsec =
-		(long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+		(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	vdso_data->wtom_coarse_sec =
 		vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@
 		vdso_data->wtom_coarse_sec++;
 	}
 
-	vdso_data->tk_mult = tk->tkr.mult;
-	vdso_data->tk_shift = tk->tkr.shift;
+	vdso_data->tk_mult = tk->tkr_mono.mult;
+	vdso_data->tk_shift = tk->tkr_mono.shift;
 	smp_wmb();
 	++vdso_data->tb_update_count;
 }
@@ -283,7 +283,7 @@
 	if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
 		panic("Couldn't request external interrupt 0x1406");
 
-	if (clocksource_register(&clocksource_tod) != 0)
+	if (__clocksource_register(&clocksource_tod) != 0)
 		panic("Could not register TOD clock source");
 
 	/* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 9254aff..fc7ec95 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -77,7 +77,7 @@
 
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
@@ -213,7 +213,7 @@
 	 * - gpr 3 contains the virtqueue index (passed as datamatch)
 	 * - gpr 4 contains the index on the bus (optionally)
 	 */
-	ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+	ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
 				      vcpu->run->s.regs.gprs[2] & 0xffffffff,
 				      8, &vcpu->run->s.regs.gprs[3],
 				      vcpu->run->s.regs.gprs[4]);
@@ -230,7 +230,7 @@
 
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
-	int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+	int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 267523c..a7559f7 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -10,6 +10,7 @@
 #include <asm/pgtable.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
+#include <asm/switch_to.h>
 
 union asce {
 	unsigned long val;
@@ -207,6 +208,54 @@
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
+union alet {
+	u32 val;
+	struct {
+		u32 reserved : 7;
+		u32 p        : 1;
+		u32 alesn    : 8;
+		u32 alen     : 16;
+	};
+};
+
+union ald {
+	u32 val;
+	struct {
+		u32     : 1;
+		u32 alo : 24;
+		u32 all : 7;
+	};
+};
+
+struct ale {
+	unsigned long i      : 1; /* ALEN-Invalid Bit */
+	unsigned long        : 5;
+	unsigned long fo     : 1; /* Fetch-Only Bit */
+	unsigned long p      : 1; /* Private Bit */
+	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
+	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
+	unsigned long        : 32;
+	unsigned long        : 1;
+	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
+	unsigned long        : 6;
+	unsigned long astesn : 32; /* ASTE Sequence Number */
+} __packed;
+
+struct aste {
+	unsigned long i      : 1; /* ASX-Invalid Bit */
+	unsigned long ato    : 29; /* Authority-Table Origin */
+	unsigned long        : 1;
+	unsigned long b      : 1; /* Base-Space Bit */
+	unsigned long ax     : 16; /* Authorization Index */
+	unsigned long atl    : 12; /* Authority-Table Length */
+	unsigned long        : 2;
+	unsigned long ca     : 1; /* Controlled-ASN Bit */
+	unsigned long ra     : 1; /* Reusable-ASN Bit */
+	unsigned long asce   : 64; /* Address-Space-Control Element */
+	unsigned long ald    : 32;
+	unsigned long astesn : 32;
+	/* .. more fields there */
+} __packed;
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -307,15 +356,157 @@
 		ipte_unlock_simple(vcpu);
 }
 
-static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
+			  int write)
 {
+	union alet alet;
+	struct ale ale;
+	struct aste aste;
+	unsigned long ald_addr, authority_table_addr;
+	union ald ald;
+	int eax, rc;
+	u8 authority_table;
+
+	if (ar >= NUM_ACRS)
+		return -EINVAL;
+
+	save_access_regs(vcpu->run->s.regs.acrs);
+	alet.val = vcpu->run->s.regs.acrs[ar];
+
+	if (ar == 0 || alet.val == 0) {
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
+	} else if (alet.val == 1) {
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
+	}
+
+	if (alet.reserved)
+		return PGM_ALET_SPECIFICATION;
+
+	if (alet.p)
+		ald_addr = vcpu->arch.sie_block->gcr[5];
+	else
+		ald_addr = vcpu->arch.sie_block->gcr[2];
+	ald_addr &= 0x7fffffc0;
+
+	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+	if (rc)
+		return rc;
+
+	if (alet.alen / 8 > ald.all)
+		return PGM_ALEN_TRANSLATION;
+
+	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+		return PGM_ADDRESSING;
+
+	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+			     sizeof(struct ale));
+	if (rc)
+		return rc;
+
+	if (ale.i == 1)
+		return PGM_ALEN_TRANSLATION;
+	if (ale.alesn != alet.alesn)
+		return PGM_ALE_SEQUENCE;
+
+	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+	if (rc)
+		return rc;
+
+	if (aste.i)
+		return PGM_ASTE_VALIDITY;
+	if (aste.astesn != ale.astesn)
+		return PGM_ASTE_SEQUENCE;
+
+	if (ale.p == 1) {
+		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+		if (ale.aleax != eax) {
+			if (eax / 16 > aste.atl)
+				return PGM_EXTENDED_AUTHORITY;
+
+			authority_table_addr = aste.ato * 4 + eax / 4;
+
+			rc = read_guest_real(vcpu, authority_table_addr,
+					     &authority_table,
+					     sizeof(u8));
+			if (rc)
+				return rc;
+
+			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+				return PGM_EXTENDED_AUTHORITY;
+		}
+	}
+
+	if (ale.fo == 1 && write)
+		return PGM_PROTECTION;
+
+	asce->val = aste.asce;
+	return 0;
+}
+
+struct trans_exc_code_bits {
+	unsigned long addr : 52; /* Translation-exception Address */
+	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+	unsigned long	   : 6;
+	unsigned long b60  : 1;
+	unsigned long b61  : 1;
+	unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+	FSI_STORE   = 1, /* Exception was due to store operation */
+	FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+			 ar_t ar, int write)
+{
+	int rc;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	struct trans_exc_code_bits *tec_bits;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+	tec_bits->as = psw_bits(*psw).as;
+
+	if (!psw_bits(*psw).t) {
+		asce->val = 0;
+		asce->r = 1;
+		return 0;
+	}
+
 	switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
 	case PSW_AS_PRIMARY:
-		return vcpu->arch.sie_block->gcr[1];
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
 	case PSW_AS_SECONDARY:
-		return vcpu->arch.sie_block->gcr[7];
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
 	case PSW_AS_HOME:
-		return vcpu->arch.sie_block->gcr[13];
+		asce->val = vcpu->arch.sie_block->gcr[13];
+		return 0;
+	case PSW_AS_ACCREG:
+		rc = ar_translation(vcpu, asce, ar, write);
+		switch (rc) {
+		case PGM_ALEN_TRANSLATION:
+		case PGM_ALE_SEQUENCE:
+		case PGM_ASTE_VALIDITY:
+		case PGM_ASTE_SEQUENCE:
+		case PGM_EXTENDED_AUTHORITY:
+			vcpu->arch.pgm.exc_access_id = ar;
+			break;
+		case PGM_PROTECTION:
+			tec_bits->b60 = 1;
+			tec_bits->b61 = 1;
+			break;
+		}
+		if (rc > 0)
+			pgm->code = rc;
+		return rc;
 	}
 	return 0;
 }
@@ -330,10 +521,11 @@
  * @vcpu: virtual cpu
  * @gva: guest virtual address
  * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
  * @write: indicates if access is a write access
  *
  * Translate a guest virtual address into a guest absolute address by means
- * of dynamic address translation as specified by the architecuture.
+ * of dynamic address translation as specified by the architecture.
  * If the resulting absolute address is not available in the configuration
  * an addressing exception is indicated and @gpa will not be changed.
  *
@@ -345,7 +537,8 @@
  *	      by the architecture
  */
 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
-				     unsigned long *gpa, int write)
+				     unsigned long *gpa, const union asce asce,
+				     int write)
 {
 	union vaddress vaddr = {.addr = gva};
 	union raddress raddr = {.addr = gva};
@@ -354,12 +547,10 @@
 	union ctlreg0 ctlreg0;
 	unsigned long ptr;
 	int edat1, edat2;
-	union asce asce;
 
 	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
 	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
 	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
-	asce.val = get_vcpu_asce(vcpu);
 	if (asce.r)
 		goto real_address;
 	ptr = asce.origin * 4096;
@@ -506,48 +697,30 @@
 	return (ga & ~0x11fful) == 0;
 }
 
-static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+					  const union asce asce)
 {
 	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	union asce asce;
 
 	if (!ctlreg0.lap)
 		return 0;
-	asce.val = get_vcpu_asce(vcpu);
 	if (psw_bits(*psw).t && asce.p)
 		return 0;
 	return 1;
 }
 
-struct trans_exc_code_bits {
-	unsigned long addr : 52; /* Translation-exception Address */
-	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
-	unsigned long	   : 7;
-	unsigned long b61  : 1;
-	unsigned long as   : 2;  /* ASCE Identifier */
-};
-
-enum {
-	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
-	FSI_STORE   = 1, /* Exception was due to store operation */
-	FSI_FETCH   = 2  /* Exception was due to fetch operation */
-};
-
 static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
 			    unsigned long *pages, unsigned long nr_pages,
-			    int write)
+			    const union asce asce, int write)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	struct trans_exc_code_bits *tec_bits;
 	int lap_enabled, rc;
 
-	memset(pgm, 0, sizeof(*pgm));
 	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
-	tec_bits->as = psw_bits(*psw).as;
-	lap_enabled = low_address_protection_enabled(vcpu);
+	lap_enabled = low_address_protection_enabled(vcpu, asce);
 	while (nr_pages) {
 		ga = kvm_s390_logical_to_effective(vcpu, ga);
 		tec_bits->addr = ga >> PAGE_SHIFT;
@@ -557,7 +730,7 @@
 		}
 		ga &= PAGE_MASK;
 		if (psw_bits(*psw).t) {
-			rc = guest_translate(vcpu, ga, pages, write);
+			rc = guest_translate(vcpu, ga, pages, asce, write);
 			if (rc < 0)
 				return rc;
 			if (rc == PGM_PROTECTION)
@@ -578,7 +751,7 @@
 	return 0;
 }
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		 unsigned long len, int write)
 {
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -591,20 +764,19 @@
 
 	if (!len)
 		return 0;
-	/* Access register mode is not supported yet. */
-	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-		return -EOPNOTSUPP;
+	rc = get_vcpu_asce(vcpu, &asce, ar, write);
+	if (rc)
+		return rc;
 	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
 	pages = pages_array;
 	if (nr_pages > ARRAY_SIZE(pages_array))
 		pages = vmalloc(nr_pages * sizeof(unsigned long));
 	if (!pages)
 		return -ENOMEM;
-	asce.val = get_vcpu_asce(vcpu);
 	need_ipte_lock = psw_bits(*psw).t && !asce.r;
 	if (need_ipte_lock)
 		ipte_lock(vcpu);
-	rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+	rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
 	for (idx = 0; idx < nr_pages && !rc; idx++) {
 		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
 		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -652,7 +824,7 @@
  * Note: The IPTE lock is not taken during this function, so the caller
  * has to take care of this.
  */
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
 			    unsigned long *gpa, int write)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
@@ -661,26 +833,21 @@
 	union asce asce;
 	int rc;
 
-	/* Access register mode is not supported yet. */
-	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-		return -EOPNOTSUPP;
-
 	gva = kvm_s390_logical_to_effective(vcpu, gva);
-	memset(pgm, 0, sizeof(*pgm));
 	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	tec->as = psw_bits(*psw).as;
-	tec->fsi = write ? FSI_STORE : FSI_FETCH;
+	rc = get_vcpu_asce(vcpu, &asce, ar, write);
 	tec->addr = gva >> PAGE_SHIFT;
-	if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+	if (rc)
+		return rc;
+	if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
 		if (write) {
 			rc = pgm->code = PGM_PROTECTION;
 			return rc;
 		}
 	}
 
-	asce.val = get_vcpu_asce(vcpu);
 	if (psw_bits(*psw).t && !asce.r) {	/* Use DAT? */
-		rc = guest_translate(vcpu, gva, gpa, write);
+		rc = guest_translate(vcpu, gva, gpa, asce, write);
 		if (rc > 0) {
 			if (rc == PGM_PROTECTION)
 				tec->b61 = 1;
@@ -697,28 +864,51 @@
 }
 
 /**
- * kvm_s390_check_low_addr_protection - check for low-address protection
- * @ga: Guest address
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write)
+{
+	unsigned long gpa;
+	unsigned long currlen;
+	int rc = 0;
+
+	ipte_lock(vcpu);
+	while (length > 0 && !rc) {
+		currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+		rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+		gva += currlen;
+		length -= currlen;
+	}
+	ipte_unlock(vcpu);
+
+	return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
  *
  * Checks whether an address is subject to low-address protection and set
  * up vcpu->arch.pgm accordingly if necessary.
  *
  * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
  */
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	struct trans_exc_code_bits *tec_bits;
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 
-	if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+	if (!ctlreg0.lap || !is_low_address(gra))
 		return 0;
 
 	memset(pgm, 0, sizeof(*pgm));
 	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
 	tec_bits->fsi = FSI_STORE;
 	tec_bits->as = psw_bits(*psw).as;
-	tec_bits->addr = ga >> PAGE_SHIFT;
+	tec_bits->addr = gra >> PAGE_SHIFT;
 	pgm->code = PGM_PROTECTION;
 
 	return pgm->code;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 0149cf1..ef03726 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -156,9 +156,11 @@
 }
 
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
-			    unsigned long *gpa, int write);
+			    ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write);
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		 unsigned long len, int write);
 
 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
@@ -168,6 +170,7 @@
  * write_guest - copy data from kernel space to guest space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: source address in kernel space
  * @len: number of bytes to copy
  *
@@ -176,8 +179,7 @@
  * If DAT is off data will be copied to guest real or absolute memory.
  * If DAT is on data will be copied to the address space as specified by
  * the address space bits of the PSW:
- * Primary, secondory or home space (access register mode is currently not
- * implemented).
+ * Primary, secondary, home space or access register mode.
  * The addressing mode of the PSW is also inspected, so that address wrap
  * around is taken into account for 24-, 31- and 64-bit addressing mode,
  * if the to be copied data crosses page boundaries in guest address space.
@@ -210,16 +212,17 @@
  *	 if data has been changed in guest space in case of an exception.
  */
 static inline __must_check
-int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		unsigned long len)
 {
-	return access_guest(vcpu, ga, data, len, 1);
+	return access_guest(vcpu, ga, ar, data, len, 1);
 }
 
 /**
  * read_guest - copy data from guest space to kernel space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: destination address in kernel space
  * @len: number of bytes to copy
  *
@@ -229,10 +232,10 @@
  * data will be copied from guest space to kernel space.
  */
 static inline __must_check
-int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 	       unsigned long len)
 {
-	return access_guest(vcpu, ga, data, len, 0);
+	return access_guest(vcpu, ga, ar, data, len, 0);
 }
 
 /**
@@ -330,6 +333,6 @@
 void ipte_lock(struct kvm_vcpu *vcpu);
 void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3e8d409..e97b345 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -191,8 +191,8 @@
 	if (!wp_info->old_data)
 		return -ENOMEM;
 	/* try to backup the original value */
-	ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
-			 wp_info->len);
+	ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+			     wp_info->len);
 	if (ret) {
 		kfree(wp_info->old_data);
 		wp_info->old_data = NULL;
@@ -362,8 +362,8 @@
 			continue;
 
 		/* refetch the wp data and compare it to the old value */
-		if (!read_guest(vcpu, wp_info->phys_addr, temp,
-				wp_info->len)) {
+		if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+				    wp_info->len)) {
 			if (memcmp(temp, wp_info->old_data, wp_info->len)) {
 				kfree(temp);
 				return wp_info;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index bebd215..9e3779e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -165,6 +165,7 @@
 		pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
 		pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
 		break;
+	case PGM_VECTOR_PROCESSING:
 	case PGM_DATA:
 		pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
 		break;
@@ -319,7 +320,7 @@
 
 	/* Make sure that the source is paged-in */
 	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
-				     &srcaddr, 0);
+				     reg2, &srcaddr, 0);
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -328,7 +329,7 @@
 
 	/* Make sure that the destination is paged-in */
 	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
-				     &dstaddr, 1);
+				     reg1, &dstaddr, 1);
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 073b5f3..9de4726 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
 /*
  * handling kvm guest interrupts
  *
- * Copyright IBM Corp. 2008,2014
+ * Copyright IBM Corp. 2008, 2015
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -17,9 +17,12 @@
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
+#include <asm/dis.h>
 #include <asm/uaccess.h>
 #include <asm/sclp.h>
+#include <asm/isc.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
@@ -32,11 +35,6 @@
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
-static int is_ioint(u64 type)
-{
-	return ((type & 0xfffe0000u) != 0xfffe0000u);
-}
-
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -72,70 +70,45 @@
 	return 1;
 }
 
-static u64 int_word_to_isc_bits(u32 int_word)
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
-	u8 isc = (int_word & 0x38000000) >> 27;
+	if (!(vcpu->arch.sie_block->ckc <
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+		return 0;
+	return ckc_interrupts_enabled(vcpu);
+}
 
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	return !psw_extint_disabled(vcpu) &&
+	       (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.sie_block->cputm >> 63) &&
+	       cpu_timer_interrupts_enabled(vcpu);
+}
+
+static inline int is_ioirq(unsigned long irq_type)
+{
+	return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
+		(irq_type <= IRQ_PEND_IO_ISC_7));
+}
+
+static uint64_t isc_to_isc_bits(int isc)
+{
 	return (0x80 >> isc) << 24;
 }
 
-static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt_info *inti)
+static inline u8 int_word_to_isc(u32 int_word)
 {
-	switch (inti->type) {
-	case KVM_S390_INT_EXTERNAL_CALL:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
-			return 1;
-		return 0;
-	case KVM_S390_INT_EMERGENCY:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
-			return 1;
-		return 0;
-	case KVM_S390_INT_CLOCK_COMP:
-		return ckc_interrupts_enabled(vcpu);
-	case KVM_S390_INT_CPU_TIMER:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
-			return 1;
-		return 0;
-	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_INIT:
-	case KVM_S390_INT_PFAULT_DONE:
-	case KVM_S390_INT_VIRTIO:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
-			return 1;
-		return 0;
-	case KVM_S390_PROGRAM_INT:
-	case KVM_S390_SIGP_STOP:
-	case KVM_S390_SIGP_SET_PREFIX:
-	case KVM_S390_RESTART:
-		return 1;
-	case KVM_S390_MCHK:
-		if (psw_mchk_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
-			return 1;
-		return 0;
-	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (psw_ioint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[6] &
-		    int_word_to_isc_bits(inti->io.io_int_word))
-			return 1;
-		return 0;
-	default:
-		printk(KERN_WARNING "illegal interrupt type %llx\n",
-		       inti->type);
-		BUG();
-	}
-	return 0;
+	return (int_word & 0x38000000) >> 27;
+}
+
+static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.float_int.pending_irqs;
 }
 
 static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
@@ -143,12 +116,31 @@
 	return vcpu->arch.local_int.pending_irqs;
 }
 
-static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+				   unsigned long active_mask)
 {
-	unsigned long active_mask = pending_local_irqs(vcpu);
+	int i;
+
+	for (i = 0; i <= MAX_ISC; i++)
+		if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+			active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+
+	return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask;
+
+	active_mask = pending_local_irqs(vcpu);
+	active_mask |= pending_floating_irqs(vcpu);
 
 	if (psw_extint_disabled(vcpu))
 		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (psw_ioint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_IO_MASK;
+	else
+		active_mask = disable_iscs(vcpu, active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
 		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
@@ -157,8 +149,13 @@
 		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
 		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+		__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
 	if (psw_mchk_disabled(vcpu))
 		active_mask &= ~IRQ_PEND_MCHK_MASK;
+	if (!(vcpu->arch.sie_block->gcr[14] &
+	      vcpu->kvm->arch.float_int.mchk.cr14))
+		__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
 
 	/*
 	 * STOP irqs will never be actively delivered. They are triggered via
@@ -200,6 +197,16 @@
 	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+		return;
+	else if (psw_ioint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_IO_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 {
 	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
@@ -226,47 +233,17 @@
 		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
 }
 
-/* Set interception request for non-deliverable local interrupts */
-static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
 {
+	set_intercept_indicators_io(vcpu);
 	set_intercept_indicators_ext(vcpu);
 	set_intercept_indicators_mchk(vcpu);
 	set_intercept_indicators_stop(vcpu);
 }
 
-static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt_info *inti)
-{
-	switch (inti->type) {
-	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_DONE:
-	case KVM_S390_INT_VIRTIO:
-		if (psw_extint_disabled(vcpu))
-			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
-		else
-			vcpu->arch.sie_block->lctl |= LCTL_CR0;
-		break;
-	case KVM_S390_MCHK:
-		if (psw_mchk_disabled(vcpu))
-			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
-		else
-			vcpu->arch.sie_block->lctl |= LCTL_CR14;
-		break;
-	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (psw_ioint_disabled(vcpu))
-			__set_cpuflag(vcpu, CPUSTAT_IO_INT);
-		else
-			vcpu->arch.sie_block->lctl |= LCTL_CR6;
-		break;
-	default:
-		BUG();
-	}
-}
-
 static u16 get_ilc(struct kvm_vcpu *vcpu)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
-
 	switch (vcpu->arch.sie_block->icptcode) {
 	case ICPT_INST:
 	case ICPT_INSTPROGI:
@@ -274,7 +251,7 @@
 	case ICPT_PARTEXEC:
 	case ICPT_IOINST:
 		/* last instruction only stored for these icptcodes */
-		return table[vcpu->arch.sie_block->ipa >> 14];
+		return insn_length(vcpu->arch.sie_block->ipa >> 8);
 	case ICPT_PROGI:
 		return vcpu->arch.sie_block->pgmilc;
 	default:
@@ -350,38 +327,72 @@
 
 static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_mchk_info mchk;
-	int rc;
+	struct kvm_s390_mchk_info mchk = {};
+	unsigned long adtl_status_addr;
+	int deliver = 0;
+	int rc = 0;
 
+	spin_lock(&fi->lock);
 	spin_lock(&li->lock);
-	mchk = li->irq.mchk;
+	if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+	    test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+		/*
+		 * If there was an exigent machine check pending, then any
+		 * repressible machine checks that might have been pending
+		 * are indicated along with it, so always clear bits for
+		 * repressible and exigent interrupts
+		 */
+		mchk = li->irq.mchk;
+		clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+		clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+		memset(&li->irq.mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
 	/*
-	 * If there was an exigent machine check pending, then any repressible
-	 * machine checks that might have been pending are indicated along
-	 * with it, so always clear both bits
+	 * We indicate floating repressible conditions along with
+	 * other pending conditions. Channel Report Pending and Channel
+	 * Subsystem damage are the only two and and are indicated by
+	 * bits in mcic and masked in cr14.
 	 */
-	clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
-	clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
-	memset(&li->irq.mchk, 0, sizeof(mchk));
+	if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		mchk.mcic |= fi->mchk.mcic;
+		mchk.cr14 |= fi->mchk.cr14;
+		memset(&fi->mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
 	spin_unlock(&li->lock);
+	spin_unlock(&fi->lock);
 
-	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-		   mchk.mcic);
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-					 mchk.cr14, mchk.mcic);
+	if (deliver) {
+		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+			   mchk.mcic);
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_MCHK,
+						 mchk.cr14, mchk.mcic);
 
-	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-	rc |= put_guest_lc(vcpu, mchk.mcic,
-			   (u64 __user *) __LC_MCCK_CODE);
-	rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
-			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-			     &mchk.fixed_logout, sizeof(mchk.fixed_logout));
-	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc  = kvm_s390_vcpu_store_status(vcpu,
+						 KVM_S390_STORE_STATUS_PREFIXED);
+		rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
+				    &adtl_status_addr,
+				    sizeof(unsigned long));
+		rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
+						      adtl_status_addr);
+		rc |= put_guest_lc(vcpu, mchk.mcic,
+				   (u64 __user *) __LC_MCCK_CODE);
+		rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+				   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+				     &mchk.fixed_logout,
+				     sizeof(mchk.fixed_logout));
+		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
+	}
 	return rc ? -EFAULT : 0;
 }
 
@@ -484,7 +495,7 @@
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_pgm_info pgm_info;
-	int rc = 0;
+	int rc = 0, nullifying = false;
 	u16 ilc = get_ilc(vcpu);
 
 	spin_lock(&li->lock);
@@ -509,6 +520,8 @@
 	case PGM_LX_TRANSLATION:
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
+		nullifying = true;
+		/* fall through */
 	case PGM_SPACE_SWITCH:
 		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
@@ -521,6 +534,7 @@
 	case PGM_EXTENDED_AUTHORITY:
 		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
 				  (u8 *)__LC_EXC_ACCESS_ID);
+		nullifying = true;
 		break;
 	case PGM_ASCE_TYPE:
 	case PGM_PAGE_TRANSLATION:
@@ -534,6 +548,7 @@
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
 				   (u8 *)__LC_OP_ACCESS_ID);
+		nullifying = true;
 		break;
 	case PGM_MONITOR:
 		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
@@ -541,6 +556,7 @@
 		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
 				   (u64 *)__LC_MON_CODE);
 		break;
+	case PGM_VECTOR_PROCESSING:
 	case PGM_DATA:
 		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
 				  (u32 *)__LC_DATA_EXC_CODE);
@@ -551,6 +567,15 @@
 		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		break;
+	case PGM_STACK_FULL:
+	case PGM_STACK_EMPTY:
+	case PGM_STACK_SPECIFICATION:
+	case PGM_STACK_TYPE:
+	case PGM_STACK_OPERATION:
+	case PGM_TRACE_TABEL:
+	case PGM_CRYPTO_OPERATION:
+		nullifying = true;
+		break;
 	}
 
 	if (pgm_info.code & PGM_PER) {
@@ -564,7 +589,12 @@
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
+	if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
+		kvm_s390_rewind_psw(vcpu, ilc);
+
 	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+				 (u64 *) __LC_LAST_BREAK);
 	rc |= put_guest_lc(vcpu, pgm_info.code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -574,16 +604,27 @@
 	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
-					  struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 {
-	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_ext_info ext;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+		spin_unlock(&fi->lock);
+		return 0;
+	}
+	ext = fi->srv_signal;
+	memset(&fi->srv_signal, 0, sizeof(ext));
+	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
 	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-		   inti->ext.ext_params);
+		   ext.ext_params);
 	vcpu->stat.deliver_service_signal++;
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-					 inti->ext.ext_params, 0);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+					 ext.ext_params, 0);
 
 	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
 	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
@@ -591,106 +632,146 @@
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+	rc |= put_guest_lc(vcpu, ext.ext_params,
 			   (u32 *)__LC_EXT_PARAMS);
+
 	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
-					   struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 {
-	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
 
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-					 KVM_S390_INT_PFAULT_DONE, 0,
-					 inti->ext.ext_params2);
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				KVM_S390_INT_PFAULT_DONE, 0,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+		clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
-	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-			   (u64 *)__LC_EXT_PARAMS2);
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, PFAULT_DONE,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
 	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
-					 struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 {
-	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
 
-	VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-		   inti->ext.ext_params, inti->ext.ext_params2);
-	vcpu->stat.deliver_virtio_interrupt++;
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-					 inti->ext.ext_params,
-					 inti->ext.ext_params2);
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4,
+			   "interrupt: virtio parm:%x,parm64:%llx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio_interrupt++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				inti->ext.ext_params,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+		clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
-	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-			   (u32 *)__LC_EXT_PARAMS);
-	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-			   (u64 *)__LC_EXT_PARAMS2);
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				(u32 *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
 	return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
-				     struct kvm_s390_interrupt_info *inti)
+				     unsigned long irq_type)
 {
-	int rc;
+	struct list_head *isc_list;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int rc = 0;
 
-	VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-	vcpu->stat.deliver_io_int++;
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-					 ((__u32)inti->io.subchannel_id << 16) |
-						inti->io.subchannel_nr,
-					 ((__u64)inti->io.io_int_parm << 32) |
-						inti->io.io_int_word);
+	fi = &vcpu->kvm->arch.float_int;
 
-	rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-			   (u16 *)__LC_SUBCHANNEL_ID);
-	rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-			   (u16 *)__LC_SUBCHANNEL_NR);
-	rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-			   (u32 *)__LC_IO_INT_PARM);
-	rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-			   (u32 *)__LC_IO_INT_WORD);
-	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	return rc ? -EFAULT : 0;
-}
+	spin_lock(&fi->lock);
+	isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+	inti = list_first_entry_or_null(isc_list,
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+		vcpu->stat.deliver_io_int++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				((__u32)inti->io.subchannel_id << 16) |
+				inti->io.subchannel_nr,
+				((__u64)inti->io.io_int_parm << 32) |
+				inti->io.io_int_word);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+	}
+	if (list_empty(isc_list))
+		clear_bit(irq_type, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
 
-static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
-					   struct kvm_s390_interrupt_info *inti)
-{
-	struct kvm_s390_mchk_info *mchk = &inti->mchk;
-	int rc;
+	if (inti) {
+		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+				(u16 *)__LC_SUBCHANNEL_ID);
+		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+				(u16 *)__LC_SUBCHANNEL_NR);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+				(u32 *)__LC_IO_INT_PARM);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+				(u32 *)__LC_IO_INT_WORD);
+		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		kfree(inti);
+	}
 
-	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-		   mchk->mcic);
-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-					 mchk->cr14, mchk->mcic);
-
-	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-	rc |= put_guest_lc(vcpu, mchk->mcic,
-			(u64 __user *) __LC_MCCK_CODE);
-	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
-			(u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-			     &mchk->fixed_logout, sizeof(mchk->fixed_logout));
-	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	return rc ? -EFAULT : 0;
 }
 
@@ -698,6 +779,7 @@
 
 static const deliver_irq_t deliver_irq_funcs[] = {
 	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
 	[IRQ_PEND_PROG]           = __deliver_prog,
 	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
 	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
@@ -706,36 +788,11 @@
 	[IRQ_PEND_RESTART]        = __deliver_restart,
 	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
 	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+	[IRQ_PEND_EXT_SERVICE]    = __deliver_service,
+	[IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
+	[IRQ_PEND_VIRTIO]         = __deliver_virtio,
 };
 
-static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
-					   struct kvm_s390_interrupt_info *inti)
-{
-	int rc;
-
-	switch (inti->type) {
-	case KVM_S390_INT_SERVICE:
-		rc = __deliver_service(vcpu, inti);
-		break;
-	case KVM_S390_INT_PFAULT_DONE:
-		rc = __deliver_pfault_done(vcpu, inti);
-		break;
-	case KVM_S390_INT_VIRTIO:
-		rc = __deliver_virtio(vcpu, inti);
-		break;
-	case KVM_S390_MCHK:
-		rc = __deliver_mchk_floating(vcpu, inti);
-		break;
-	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		rc = __deliver_io(vcpu, inti);
-		break;
-	default:
-		BUG();
-	}
-
-	return rc;
-}
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -751,21 +808,9 @@
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 {
-	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct kvm_s390_interrupt_info  *inti;
 	int rc;
 
-	rc = !!deliverable_local_irqs(vcpu);
-
-	if ((!rc) && atomic_read(&fi->active)) {
-		spin_lock(&fi->lock);
-		list_for_each_entry(inti, &fi->list, list)
-			if (__interrupt_is_deliverable(vcpu, inti)) {
-				rc = 1;
-				break;
-			}
-		spin_unlock(&fi->lock);
-	}
+	rc = !!deliverable_irqs(vcpu);
 
 	if (!rc && kvm_cpu_has_pending_timer(vcpu))
 		rc = 1;
@@ -784,12 +829,7 @@
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	if (!(vcpu->arch.sie_block->ckc <
-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-		return 0;
-	if (!ckc_interrupts_enabled(vcpu))
-		return 0;
-	return 1;
+	return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -884,60 +924,45 @@
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 	deliver_irq_t func;
-	int deliver;
 	int rc = 0;
 	unsigned long irq_type;
-	unsigned long deliverable_irqs;
+	unsigned long irqs;
 
 	__reset_intercept_indicators(vcpu);
 
 	/* pending ckc conditions might have been invalidated */
 	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-	if (kvm_cpu_has_pending_timer(vcpu))
+	if (ckc_irq_pending(vcpu))
 		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 
+	/* pending cpu timer conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	if (cpu_timer_irq_pending(vcpu))
+		set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
 	do {
-		deliverable_irqs = deliverable_local_irqs(vcpu);
+		irqs = deliverable_irqs(vcpu);
 		/* bits are in the order of interrupt priority */
-		irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+		irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
 		if (irq_type == IRQ_PEND_COUNT)
 			break;
-		func = deliver_irq_funcs[irq_type];
-		if (!func) {
-			WARN_ON_ONCE(func == NULL);
-			clear_bit(irq_type, &li->pending_irqs);
-			continue;
+		if (is_ioirq(irq_type)) {
+			rc = __deliver_io(vcpu, irq_type);
+		} else {
+			func = deliver_irq_funcs[irq_type];
+			if (!func) {
+				WARN_ON_ONCE(func == NULL);
+				clear_bit(irq_type, &li->pending_irqs);
+				continue;
+			}
+			rc = func(vcpu);
 		}
-		rc = func(vcpu);
-	} while (!rc && irq_type != IRQ_PEND_COUNT);
+		if (rc)
+			break;
+	} while (!rc);
 
-	set_intercept_indicators_local(vcpu);
-
-	if (!rc && atomic_read(&fi->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&fi->lock);
-			list_for_each_entry_safe(inti, n, &fi->list, list) {
-				if (__interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					fi->irq_count--;
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&fi->list))
-				atomic_set(&fi->active, 0);
-			spin_unlock(&fi->lock);
-			if (deliver) {
-				rc = __deliver_floating_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (!rc && deliver);
-	}
+	set_intercept_indicators(vcpu);
 
 	return rc;
 }
@@ -1172,80 +1197,182 @@
 	return 0;
 }
 
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+						  int isc, u32 schid)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	struct kvm_s390_interrupt_info *iter;
+	u16 id = (schid & 0xffff0000U) >> 16;
+	u16 nr = schid & 0x0000ffffU;
 
+	spin_lock(&fi->lock);
+	list_for_each_entry(iter, isc_list, list) {
+		if (schid && (id != iter->io.subchannel_id ||
+			      nr != iter->io.subchannel_nr))
+			continue;
+		/* found an appropriate entry */
+		list_del_init(&iter->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+		if (list_empty(isc_list))
+			clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+		spin_unlock(&fi->lock);
+		return iter;
+	}
+	spin_unlock(&fi->lock);
+	return NULL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ */
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-						    u64 cr6, u64 schid)
+						    u64 isc_mask, u32 schid)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int isc;
+
+	for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+		if (isc_mask & isc_to_isc_bits(isc))
+			inti = get_io_int(kvm, isc, schid);
+	}
+	return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+	/*
+	 * Early versions of the QEMU s390 bios will inject several
+	 * service interrupts after another without handling a
+	 * condition code indicating busy.
+	 * We will silently ignore those superfluous sccb values.
+	 * A future version of QEMU will take care of serialization
+	 * of servc requests
+	 */
+	if (fi->srv_signal.ext_params & SCCB_MASK)
+		goto out;
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+	set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+			    struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+	set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+				 struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_PFAULT] >=
+		(ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_PFAULT] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+	set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+				struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+	fi->mchk.mcic |= inti->mchk.mcic;
+	set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info *inti, *iter;
+	struct list_head *list;
+	int isc;
 
-	if ((!schid && !cr6) || (schid && cr6))
-		return NULL;
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
-	inti = NULL;
-	list_for_each_entry(iter, &fi->list, list) {
-		if (!is_ioint(iter->type))
-			continue;
-		if (cr6 &&
-		    ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
-			continue;
-		if (schid) {
-			if (((schid & 0x00000000ffff0000) >> 16) !=
-			    iter->io.subchannel_id)
-				continue;
-			if ((schid & 0x000000000000ffff) !=
-			    iter->io.subchannel_nr)
-				continue;
-		}
-		inti = iter;
-		break;
+	if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
 	}
-	if (inti) {
-		list_del_init(&inti->list);
-		fi->irq_count--;
-	}
-	if (list_empty(&fi->list))
-		atomic_set(&fi->active, 0);
+	fi->counters[FIRQ_CNTR_IO] += 1;
+
+	isc = int_word_to_isc(inti->io.io_int_word);
+	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	list_add_tail(&inti->list, list);
+	set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
 	spin_unlock(&fi->lock);
-	return inti;
+	return 0;
 }
 
 static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_local_interrupt *li;
 	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info *iter;
 	struct kvm_vcpu *dst_vcpu = NULL;
 	int sigcpu;
-	int rc = 0;
+	u64 type = READ_ONCE(inti->type);
+	int rc;
 
 	fi = &kvm->arch.float_int;
-	spin_lock(&fi->lock);
-	if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
-		rc = -EINVAL;
-		goto unlock_fi;
-	}
-	fi->irq_count++;
-	if (!is_ioint(inti->type)) {
-		list_add_tail(&inti->list, &fi->list);
-	} else {
-		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
 
-		/* Keep I/O interrupts sorted in isc order. */
-		list_for_each_entry(iter, &fi->list, list) {
-			if (!is_ioint(iter->type))
-				continue;
-			if (int_word_to_isc_bits(iter->io.io_int_word)
-			    <= isc_bits)
-				continue;
-			break;
-		}
-		list_add_tail(&inti->list, &iter->list);
+	switch (type) {
+	case KVM_S390_MCHK:
+		rc = __inject_float_mchk(kvm, inti);
+		break;
+	case KVM_S390_INT_VIRTIO:
+		rc = __inject_virtio(kvm, inti);
+		break;
+	case KVM_S390_INT_SERVICE:
+		rc = __inject_service(kvm, inti);
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		rc = __inject_pfault_done(kvm, inti);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		rc = __inject_io(kvm, inti);
+		break;
+	default:
+		rc = -EINVAL;
 	}
-	atomic_set(&fi->active, 1);
-	if (atomic_read(&kvm->online_vcpus) == 0)
-		goto unlock_fi;
+	if (rc)
+		return rc;
+
 	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
 	if (sigcpu == KVM_MAX_VCPUS) {
 		do {
@@ -1257,7 +1384,7 @@
 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
 	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
-	switch (inti->type) {
+	switch (type) {
 	case KVM_S390_MCHK:
 		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 		break;
@@ -1270,9 +1397,8 @@
 	}
 	spin_unlock(&li->lock);
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
-unlock_fi:
-	spin_unlock(&fi->lock);
-	return rc;
+	return 0;
+
 }
 
 int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1332,10 +1458,10 @@
 	return rc;
 }
 
-void kvm_s390_reinject_io_int(struct kvm *kvm,
+int kvm_s390_reinject_io_int(struct kvm *kvm,
 			      struct kvm_s390_interrupt_info *inti)
 {
-	__inject_vm(kvm, inti);
+	return __inject_vm(kvm, inti);
 }
 
 int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
@@ -1388,12 +1514,10 @@
 	spin_unlock(&li->lock);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
-	spin_lock(&li->lock);
 	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
 		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
@@ -1433,83 +1557,130 @@
 	default:
 		rc = -EINVAL;
 	}
+
+	return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	spin_lock(&li->lock);
+	rc = do_inject_vcpu(vcpu, irq);
 	spin_unlock(&li->lock);
 	if (!rc)
 		kvm_s390_vcpu_wakeup(vcpu);
 	return rc;
 }
 
-void kvm_s390_clear_float_irqs(struct kvm *kvm)
+static inline void clear_irq_list(struct list_head *_list)
 {
-	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info	*n, *inti = NULL;
+	struct kvm_s390_interrupt_info *inti, *n;
 
-	fi = &kvm->arch.float_int;
-	spin_lock(&fi->lock);
-	list_for_each_entry_safe(inti, n, &fi->list, list) {
+	list_for_each_entry_safe(inti, n, _list, list) {
 		list_del(&inti->list);
 		kfree(inti);
 	}
-	fi->irq_count = 0;
-	atomic_set(&fi->active, 0);
-	spin_unlock(&fi->lock);
 }
 
-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
-				   u8 *addr)
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
-	struct kvm_s390_irq irq = {0};
-
-	irq.type = inti->type;
+	irq->type = inti->type;
 	switch (inti->type) {
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
-	case KVM_S390_INT_SERVICE:
-		irq.u.ext = inti->ext;
+		irq->u.ext = inti->ext;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		irq.u.io = inti->io;
+		irq->u.io = inti->io;
 		break;
-	case KVM_S390_MCHK:
-		irq.u.mchk = inti->mchk;
-		break;
-	default:
-		return -EINVAL;
 	}
-
-	if (copy_to_user(uptr, &irq, sizeof(irq)))
-		return -EFAULT;
-
-	return 0;
 }
 
-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	int i;
+
+	spin_lock(&fi->lock);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		clear_irq_list(&fi->lists[i]);
+	for (i = 0; i < FIRQ_MAX_COUNT; i++)
+		fi->counters[i] = 0;
+	spin_unlock(&fi->lock);
+};
+
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 {
 	struct kvm_s390_interrupt_info *inti;
 	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_irq *buf;
+	struct kvm_s390_irq *irq;
+	int max_irqs;
 	int ret = 0;
 	int n = 0;
+	int i;
+
+	if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+		return -EINVAL;
+
+	/*
+	 * We are already using -ENOMEM to signal
+	 * userspace it may retry with a bigger buffer,
+	 * so we need to use something else for this case
+	 */
+	buf = vzalloc(len);
+	if (!buf)
+		return -ENOBUFS;
+
+	max_irqs = len / sizeof(struct kvm_s390_irq);
 
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
-
-	list_for_each_entry(inti, &fi->list, list) {
-		if (len < sizeof(struct kvm_s390_irq)) {
+	for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+		list_for_each_entry(inti, &fi->lists[i], list) {
+			if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+			}
+			inti_to_irq(inti, &buf[n]);
+			n++;
+		}
+	}
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+		if (n == max_irqs) {
 			/* signal userspace to try again */
 			ret = -ENOMEM;
-			break;
+			goto out;
 		}
-		ret = copy_irq_to_user(inti, buf);
-		if (ret)
-			break;
-		buf += sizeof(struct kvm_s390_irq);
-		len -= sizeof(struct kvm_s390_irq);
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_INT_SERVICE;
+		irq->u.ext = fi->srv_signal;
 		n++;
 	}
+	if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+		}
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = fi->mchk;
+		n++;
+}
 
+out:
 	spin_unlock(&fi->lock);
+	if (!ret && n > 0) {
+		if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+			ret = -EFAULT;
+	}
+	vfree(buf);
 
 	return ret < 0 ? ret : n;
 }
@@ -1520,7 +1691,7 @@
 
 	switch (attr->group) {
 	case KVM_DEV_FLIC_GET_ALL_IRQS:
-		r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+		r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
 					  attr->attr);
 		break;
 	default:
@@ -1952,3 +2123,143 @@
 {
 	return -EINVAL;
 }
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq *buf;
+	int r = 0;
+	int n;
+
+	buf = vmalloc(len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user((void *) buf, irqstate, len)) {
+		r = -EFAULT;
+		goto out_free;
+	}
+
+	/*
+	 * Don't allow setting the interrupt state
+	 * when there are already interrupts pending
+	 */
+	spin_lock(&li->lock);
+	if (li->pending_irqs) {
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < len / sizeof(*buf); n++) {
+		r = do_inject_vcpu(vcpu, &buf[n]);
+		if (r)
+			break;
+	}
+
+out_unlock:
+	spin_unlock(&li->lock);
+out_free:
+	vfree(buf);
+
+	return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+			    struct kvm_s390_irq *irq,
+			    unsigned long irq_type)
+{
+	switch (irq_type) {
+	case IRQ_PEND_MCHK_EX:
+	case IRQ_PEND_MCHK_REP:
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = li->irq.mchk;
+		break;
+	case IRQ_PEND_PROG:
+		irq->type = KVM_S390_PROGRAM_INT;
+		irq->u.pgm = li->irq.pgm;
+		break;
+	case IRQ_PEND_PFAULT_INIT:
+		irq->type = KVM_S390_INT_PFAULT_INIT;
+		irq->u.ext = li->irq.ext;
+		break;
+	case IRQ_PEND_EXT_EXTERNAL:
+		irq->type = KVM_S390_INT_EXTERNAL_CALL;
+		irq->u.extcall = li->irq.extcall;
+		break;
+	case IRQ_PEND_EXT_CLOCK_COMP:
+		irq->type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case IRQ_PEND_EXT_CPU_TIMER:
+		irq->type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case IRQ_PEND_SIGP_STOP:
+		irq->type = KVM_S390_SIGP_STOP;
+		irq->u.stop = li->irq.stop;
+		break;
+	case IRQ_PEND_RESTART:
+		irq->type = KVM_S390_RESTART;
+		break;
+	case IRQ_PEND_SET_PREFIX:
+		irq->type = KVM_S390_SIGP_SET_PREFIX;
+		irq->u.prefix = li->irq.prefix;
+		break;
+	}
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	unsigned long pending_irqs;
+	struct kvm_s390_irq irq;
+	unsigned long irq_type;
+	int cpuaddr;
+	int n = 0;
+
+	spin_lock(&li->lock);
+	pending_irqs = li->pending_irqs;
+	memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+	       sizeof(sigp_emerg_pending));
+	spin_unlock(&li->lock);
+
+	for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+		memset(&irq, 0, sizeof(irq));
+		if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+			continue;
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+		for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+			memset(&irq, 0, sizeof(irq));
+			if (n + sizeof(irq) > len)
+				return -ENOBUFS;
+			irq.type = KVM_S390_INT_EMERGENCY;
+			irq.u.emerg.code = cpuaddr;
+			if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+				return -EFAULT;
+			n += sizeof(irq);
+		}
+	}
+
+	if ((sigp_ctrl & SIGP_CTRL_C) &&
+	    (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+	     CPUSTAT_ECALL_PEND)) {
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		memset(&irq, 0, sizeof(irq));
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	return n;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 19e17bd..afa2bd7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -25,11 +25,13 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
+#include <asm/isc.h>
 #include <asm/sclp.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -38,6 +40,11 @@
 #include "trace.h"
 #include "trace-s390.h"
 
+#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+			   (KVM_MAX_VCPUS + LOCAL_IRQS))
+
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -87,6 +94,7 @@
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
@@ -101,8 +109,8 @@
 
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[] = {
-	0xff82fffbf4fc2000UL,
-	0x005c000000000000UL,
+	0xffe6fffbfcfdfc40UL,
+	0x205c800000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
@@ -171,9 +179,16 @@
 	case KVM_CAP_S390_IRQCHIP:
 	case KVM_CAP_VM_ATTRIBUTES:
 	case KVM_CAP_MP_STATE:
+	case KVM_CAP_S390_INJECT_IRQ:
 	case KVM_CAP_S390_USER_SIGP:
+	case KVM_CAP_S390_USER_STSI:
+	case KVM_CAP_S390_SKEYS:
+	case KVM_CAP_S390_IRQ_STATE:
 		r = 1;
 		break;
+	case KVM_CAP_S390_MEM_OP:
+		r = MEM_OP_MAX_SIZE;
+		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
@@ -184,6 +199,9 @@
 	case KVM_CAP_S390_COW:
 		r = MACHINE_HAS_ESOP;
 		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		r = MACHINE_HAS_VX;
+		break;
 	default:
 		r = 0;
 	}
@@ -264,6 +282,18 @@
 		kvm->arch.user_sigp = 1;
 		r = 0;
 		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		if (MACHINE_HAS_VX) {
+			set_kvm_facility(kvm->arch.model.fac->mask, 129);
+			set_kvm_facility(kvm->arch.model.fac->list, 129);
+			r = 0;
+		} else
+			r = -EINVAL;
+		break;
+	case KVM_CAP_S390_USER_STSI:
+		kvm->arch.user_stsi = 1;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -708,6 +738,108 @@
 	return ret;
 }
 
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	unsigned long curkey;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Is this guest using storage keys? */
+	if (!mm_use_skey(current->mm))
+		return KVM_S390_GET_SKEYS_NONE;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		curkey = get_guest_storage_key(current->mm, hva);
+		if (IS_ERR_VALUE(curkey)) {
+			r = curkey;
+			goto out;
+		}
+		keys[i] = curkey;
+	}
+
+	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+			 sizeof(uint8_t) * args->count);
+	if (r)
+		r = -EFAULT;
+out:
+	kvfree(keys);
+	return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+			   sizeof(uint8_t) * args->count);
+	if (r) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* Enable storage key handling for the guest */
+	s390_enable_skey();
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		/* Lowest order bit is reserved */
+		if (keys[i] & 0x01) {
+			r = -EINVAL;
+			goto out;
+		}
+
+		r = set_guest_storage_key(current->mm, hva,
+					  (unsigned long)keys[i], 0);
+		if (r)
+			goto out;
+	}
+out:
+	kvfree(keys);
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -767,6 +899,26 @@
 		r = kvm_s390_vm_has_attr(kvm, &attr);
 		break;
 	}
+	case KVM_S390_GET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_get_skeys(kvm, &args);
+		break;
+	}
+	case KVM_S390_SET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_set_skeys(kvm, &args);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -887,7 +1039,7 @@
 
 	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
 	if (!kvm->arch.dbf)
-		goto out_nodbf;
+		goto out_err;
 
 	/*
 	 * The architectural maximum amount of facilities is 16 kbit. To store
@@ -899,7 +1051,7 @@
 	kvm->arch.model.fac =
 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!kvm->arch.model.fac)
-		goto out_nofac;
+		goto out_err;
 
 	/* Populate the facility mask initially. */
 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
@@ -919,10 +1071,11 @@
 	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
 
 	if (kvm_s390_crypto_init(kvm) < 0)
-		goto out_crypto;
+		goto out_err;
 
 	spin_lock_init(&kvm->arch.float_int.lock);
-	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
 	mutex_init(&kvm->arch.ipte_mutex);
 
@@ -934,7 +1087,7 @@
 	} else {
 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
 		if (!kvm->arch.gmap)
-			goto out_nogmap;
+			goto out_err;
 		kvm->arch.gmap->private = kvm;
 		kvm->arch.gmap->pfault_enabled = 0;
 	}
@@ -946,15 +1099,11 @@
 	spin_lock_init(&kvm->arch.start_stop_lock);
 
 	return 0;
-out_nogmap:
-	kfree(kvm->arch.crypto.crycb);
-out_crypto:
-	free_page((unsigned long)kvm->arch.model.fac);
-out_nofac:
-	debug_unregister(kvm->arch.dbf);
-out_nodbf:
-	free_page((unsigned long)(kvm->arch.sca));
 out_err:
+	kfree(kvm->arch.crypto.crycb);
+	free_page((unsigned long)kvm->arch.model.fac);
+	debug_unregister(kvm->arch.dbf);
+	free_page((unsigned long)(kvm->arch.sca));
 	return rc;
 }
 
@@ -1034,6 +1183,8 @@
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT;
+	if (test_kvm_facility(vcpu->kvm, 129))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
 	if (kvm_is_ucontrol(vcpu->kvm))
 		return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1044,10 +1195,18 @@
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	save_fp_regs(vcpu->arch.host_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129))
+		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		save_fp_regs(vcpu->arch.host_fpregs.fprs);
 	save_access_regs(vcpu->arch.host_acrs);
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		restore_fp_ctl(&vcpu->run->s.regs.fpc);
+		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1057,11 +1216,19 @@
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		save_fp_ctl(&vcpu->run->s.regs.fpc);
+		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+	if (test_kvm_facility(vcpu->kvm, 129))
+		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1129,6 +1296,15 @@
 	return 0;
 }
 
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+	vcpu->arch.cpu_id = model->cpu_id;
+	vcpu->arch.sie_block->ibc = model->ibc;
+	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int rc = 0;
@@ -1137,6 +1313,8 @@
 						    CPUSTAT_SM |
 						    CPUSTAT_STOPPED |
 						    CPUSTAT_GED);
+	kvm_s390_vcpu_setup_model(vcpu);
+
 	vcpu->arch.sie_block->ecb   = 6;
 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= 0x10;
@@ -1147,8 +1325,11 @@
 		vcpu->arch.sie_block->eca |= 1;
 	if (sclp_has_sigpif())
 		vcpu->arch.sie_block->eca |= 0x10000000U;
-	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
-				      ICTL_TPROT;
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		vcpu->arch.sie_block->eca |= 0x00020000;
+		vcpu->arch.sie_block->ecd |= 0x20000000;
+	}
+	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
 	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1158,11 +1339,6 @@
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 
-	mutex_lock(&vcpu->kvm->lock);
-	vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
-	vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
-	mutex_unlock(&vcpu->kvm->lock);
-
 	kvm_s390_vcpu_crypto_setup(vcpu);
 
 	return rc;
@@ -1190,6 +1366,7 @@
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+	vcpu->arch.host_vregs = &sie_page->vregs;
 
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
@@ -1205,7 +1382,6 @@
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
 	}
-	vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->list;
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -1725,6 +1901,31 @@
 	return 0;
 }
 
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	u8 opcode;
+	int rc;
+
+	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+	trace_kvm_s390_sie_fault(vcpu);
+
+	/*
+	 * We want to inject an addressing exception, which is defined as a
+	 * suppressing or terminating exception. However, since we came here
+	 * by a DAT access exception, the PSW still points to the faulting
+	 * instruction since DAT exceptions are nullifying. So we've got
+	 * to look up the current opcode to get the length of the instruction
+	 * to be able to forward the PSW.
+	 */
+	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
+
+	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+}
+
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
 	int rc = -1;
@@ -1756,11 +1957,8 @@
 		}
 	}
 
-	if (rc == -1) {
-		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-		trace_kvm_s390_sie_fault(vcpu);
-		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-	}
+	if (rc == -1)
+		rc = vcpu_post_run_fault_in_sie(vcpu);
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 
@@ -1976,6 +2174,35 @@
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
+/*
+ * store additional status at address
+ */
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+					unsigned long gpa)
+{
+	/* Only bits 0-53 are used for address formation */
+	if (!(gpa & ~0x3ff))
+		return 0;
+
+	return write_guest_abs(vcpu, gpa & ~0x3ff,
+			       (void *)&vcpu->run->s.regs.vrs, 512);
+}
+
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!test_kvm_facility(vcpu->kvm, 129))
+		return 0;
+
+	/*
+	 * The guest VXRS are in the host VXRs due to the lazy
+	 * copying in vcpu load/put. Let's update our copies before we save
+	 * it into the save area.
+	 */
+	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+
+	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
+}
+
 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 {
 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -2100,6 +2327,65 @@
 	return r;
 }
 
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+				  struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void *tmpbuf = NULL;
+	int r, srcu_idx;
+	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+				    | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+	if (mop->flags & ~supported_flags)
+		return -EINVAL;
+
+	if (mop->size > MEM_OP_MAX_SIZE)
+		return -E2BIG;
+
+	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+		tmpbuf = vmalloc(mop->size);
+		if (!tmpbuf)
+			return -ENOMEM;
+	}
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+			break;
+		}
+		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		if (r == 0) {
+			if (copy_to_user(uaddr, tmpbuf, mop->size))
+				r = -EFAULT;
+		}
+		break;
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+			break;
+		}
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			break;
+		}
+		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+	vfree(tmpbuf);
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2109,6 +2395,15 @@
 	long r;
 
 	switch (ioctl) {
+	case KVM_S390_IRQ: {
+		struct kvm_s390_irq s390irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+			break;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
 		struct kvm_s390_irq s390irq;
@@ -2199,6 +2494,47 @@
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+	case KVM_S390_MEM_OP: {
+		struct kvm_s390_mem_op mem_op;
+
+		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+		else
+			r = -EFAULT;
+		break;
+	}
+	case KVM_S390_SET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+		    irq_state.len == 0 ||
+		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_set_irq_state(vcpu,
+					   (void __user *) irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	case KVM_S390_GET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len == 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_get_irq_state(vcpu,
+					   (__u8 __user *)  irq_state.buf,
+					   irq_state.len);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c34109a..ca108b9 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,16 +70,22 @@
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
-static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+typedef u8 __bitwise ar_t;
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
 	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+	if (ar)
+		*ar = base2;
+
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
-					      u64 *address1, u64 *address2)
+					      u64 *address1, u64 *address2,
+					      ar_t *ar_b1, ar_t *ar_b2)
 {
 	u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
 	u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
@@ -88,6 +94,11 @@
 
 	*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
 	*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+	if (ar_b1)
+		*ar_b1 = base1;
+	if (ar_b2)
+		*ar_b2 = base2;
 }
 
 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
@@ -98,7 +109,7 @@
 		*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
 }
 
-static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
 	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
@@ -107,14 +118,20 @@
 	if (disp2 & 0x80000)
 		disp2+=0xfff00000;
 
+	if (ar)
+		*ar = base2;
+
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
 }
 
-static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
 	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+	if (ar)
+		*ar = base2;
+
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
@@ -125,13 +142,24 @@
 	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
 }
 
-/* test availability of facility in a kvm intance */
+/* test availability of facility in a kvm instance */
 static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
 {
 	return __test_facility(nr, kvm->arch.model.fac->mask) &&
 		__test_facility(nr, kvm->arch.model.fac->list);
 }
 
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return -EINVAL;
+	ptr = (unsigned char *) fac_list + (nr >> 3);
+	*ptr |= (0x80UL >> (nr & 7));
+	return 0;
+}
+
 /* are cpu states controlled by user space */
 static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 {
@@ -150,9 +178,9 @@
 				      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-						    u64 cr6, u64 schid);
-void kvm_s390_reinject_io_int(struct kvm *kvm,
-			      struct kvm_s390_interrupt_info *inti);
+						    u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in intercept.c */
@@ -177,7 +205,10 @@
 /* implemented in kvm-s390.c */
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+					unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
@@ -241,6 +272,10 @@
 extern struct kvm_device_ops kvm_flic_ops;
 int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+			   void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+			   __u8 __user *buf, int len);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3511169..d22d8ee 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -36,15 +36,16 @@
 	struct kvm_vcpu *cpup;
 	s64 hostclk, val;
 	int i, rc;
+	ar_t ar;
 	u64 op2;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	op2 = kvm_s390_get_base_disp_s(vcpu);
+	op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (op2 & 7)	/* Operand must be on a doubleword boundary */
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, op2, &val, sizeof(val));
+	rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -68,20 +69,21 @@
 	u64 operand2;
 	u32 address;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_spx++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	/* must be word boundary */
 	if (operand2 & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* get the value */
-	rc = read_guest(vcpu, operand2, &address, sizeof(address));
+	rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -107,13 +109,14 @@
 	u64 operand2;
 	u32 address;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_stpx++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	/* must be word boundary */
 	if (operand2 & 3)
@@ -122,7 +125,7 @@
 	address = kvm_s390_get_prefix(vcpu);
 
 	/* get the value */
-	rc = write_guest(vcpu, operand2, &address, sizeof(address));
+	rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -136,18 +139,19 @@
 	u16 vcpu_id = vcpu->vcpu_id;
 	u64 ga;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_stap++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_s(vcpu);
+	ga = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	if (ga & 1)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+	rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -207,7 +211,7 @@
 	kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
 	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
 	addr = kvm_s390_logical_to_effective(vcpu, addr);
-	if (kvm_s390_check_low_addr_protection(vcpu, addr))
+	if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
 		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 	addr = kvm_s390_real_to_abs(vcpu, addr);
 
@@ -229,18 +233,20 @@
 	struct kvm_s390_interrupt_info *inti;
 	unsigned long len;
 	u32 tpi_data[3];
-	int cc, rc;
+	int rc;
 	u64 addr;
+	ar_t ar;
 
-	rc = 0;
-	addr = kvm_s390_get_base_disp_s(vcpu);
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (addr & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	cc = 0;
+
 	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
-	if (!inti)
-		goto no_interrupt;
-	cc = 1;
+	if (!inti) {
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
 	tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
 	tpi_data[1] = inti->io.io_int_parm;
 	tpi_data[2] = inti->io.io_int_word;
@@ -250,40 +256,51 @@
 		 * provided area.
 		 */
 		len = sizeof(tpi_data) - 4;
-		rc = write_guest(vcpu, addr, &tpi_data, len);
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
+		rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+		if (rc) {
+			rc = kvm_s390_inject_prog_cond(vcpu, rc);
+			goto reinject_interrupt;
+		}
 	} else {
 		/*
 		 * Store the three-word I/O interruption code into
 		 * the appropriate lowcore area.
 		 */
 		len = sizeof(tpi_data);
-		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+			/* failed writes to the low core are not recoverable */
 			rc = -EFAULT;
+			goto reinject_interrupt;
+		}
 	}
+
+	/* irq was successfully handed to the guest */
+	kfree(inti);
+	kvm_s390_set_psw_cc(vcpu, 1);
+	return 0;
+reinject_interrupt:
 	/*
 	 * If we encounter a problem storing the interruption code, the
 	 * instruction is suppressed from the guest's view: reinject the
 	 * interrupt.
 	 */
-	if (!rc)
+	if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
 		kfree(inti);
-	else
-		kvm_s390_reinject_io_int(vcpu->kvm, inti);
-no_interrupt:
-	/* Set condition code and we're done. */
-	if (!rc)
-		kvm_s390_set_psw_cc(vcpu, cc);
+		rc = -EFAULT;
+	}
+	/* don't set the cc, a pgm irq was injected or we drop to user space */
 	return rc ? -EFAULT : 0;
 }
 
 static int handle_tsch(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s390_interrupt_info *inti;
+	struct kvm_s390_interrupt_info *inti = NULL;
+	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
 
-	inti = kvm_s390_get_io_int(vcpu->kvm, 0,
-				   vcpu->run->s.regs.gprs[1]);
+	/* a valid schid has at least one bit set */
+	if (vcpu->run->s.regs.gprs[1])
+		inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+					   vcpu->run->s.regs.gprs[1]);
 
 	/*
 	 * Prepare exit to userspace.
@@ -386,15 +403,16 @@
 	psw_compat_t new_psw;
 	u64 addr;
 	int rc;
+	ar_t ar;
 
 	if (gpsw->mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	addr = kvm_s390_get_base_disp_s(vcpu);
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (!(new_psw.mask & PSW32_MASK_BASE))
@@ -412,14 +430,15 @@
 	psw_t new_psw;
 	u64 addr;
 	int rc;
+	ar_t ar;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	addr = kvm_s390_get_base_disp_s(vcpu);
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	vcpu->arch.sie_block->gpsw = new_psw;
@@ -433,18 +452,19 @@
 	u64 stidp_data = vcpu->arch.stidp_data;
 	u64 operand2;
 	int rc;
+	ar_t ar;
 
 	vcpu->stat.instruction_stidp++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	if (operand2 & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+	rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -467,6 +487,7 @@
 	for (n = mem->count - 1; n > 0 ; n--)
 		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
 
+	memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
 	mem->vm[0].cpus_total = cpus;
 	mem->vm[0].cpus_configured = cpus;
 	mem->vm[0].cpus_standby = 0;
@@ -478,6 +499,17 @@
 	ASCEBC(mem->vm[0].cpi, 16);
 }
 
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+				 u8 fc, u8 sel1, u16 sel2)
+{
+	vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+	vcpu->run->s390_stsi.addr = addr;
+	vcpu->run->s390_stsi.ar = ar;
+	vcpu->run->s390_stsi.fc = fc;
+	vcpu->run->s390_stsi.sel1 = sel1;
+	vcpu->run->s390_stsi.sel2 = sel2;
+}
+
 static int handle_stsi(struct kvm_vcpu *vcpu)
 {
 	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
@@ -486,6 +518,7 @@
 	unsigned long mem = 0;
 	u64 operand2;
 	int rc = 0;
+	ar_t ar;
 
 	vcpu->stat.instruction_stsi++;
 	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -508,7 +541,7 @@
 		return 0;
 	}
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
 	if (operand2 & 0xfff)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -532,16 +565,20 @@
 		break;
 	}
 
-	rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+	rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
 	if (rc) {
 		rc = kvm_s390_inject_prog_cond(vcpu, rc);
 		goto out;
 	}
+	if (vcpu->kvm->arch.user_stsi) {
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		rc = -EREMOTE;
+	}
 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
 	free_page(mem);
 	kvm_s390_set_psw_cc(vcpu, 0);
 	vcpu->run->s.regs.gprs[0] = 0;
-	return 0;
+	return rc;
 out_no_data:
 	kvm_s390_set_psw_cc(vcpu, 3);
 out:
@@ -670,7 +707,7 @@
 	}
 
 	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_protection(vcpu, start))
+		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
 			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 	}
 
@@ -776,13 +813,14 @@
 	int reg, rc, nr_regs;
 	u32 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_lctl++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rs(vcpu);
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -791,7 +829,7 @@
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
-	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
@@ -814,13 +852,14 @@
 	int reg, rc, nr_regs;
 	u32 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_stctl++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rs(vcpu);
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
 	if (ga & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -836,7 +875,7 @@
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
 	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -847,13 +886,14 @@
 	int reg, rc, nr_regs;
 	u64 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_lctlg++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rsy(vcpu);
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -862,7 +902,7 @@
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
 	nr_regs = ((reg3 - reg1) & 0xf) + 1;
-	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
@@ -884,13 +924,14 @@
 	int reg, rc, nr_regs;
 	u64 ctl_array[16];
 	u64 ga;
+	ar_t ar;
 
 	vcpu->stat.instruction_stctg++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	ga = kvm_s390_get_base_disp_rsy(vcpu);
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -906,7 +947,7 @@
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
 	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -931,13 +972,14 @@
 	unsigned long hva, gpa;
 	int ret = 0, cc = 0;
 	bool writable;
+	ar_t ar;
 
 	vcpu->stat.instruction_tprot++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+	kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
 
 	/* we only handle the Linux memory detection case:
 	 * access key == 0
@@ -946,11 +988,11 @@
 		return -EOPNOTSUPP;
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
 		ipte_lock(vcpu);
-	ret = guest_translate_address(vcpu, address1, &gpa, 1);
+	ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
 	if (ret == PGM_PROTECTION) {
 		/* Write protected? Try again with read-only... */
 		cc = 1;
-		ret = guest_translate_address(vcpu, address1, &gpa, 0);
+		ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
 	}
 	if (ret) {
 		if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 23b1e86..72e58bd 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -393,6 +393,9 @@
 	case SIGP_STORE_STATUS_AT_ADDRESS:
 		vcpu->stat.instruction_sigp_store_status++;
 		break;
+	case SIGP_STORE_ADDITIONAL_STATUS:
+		vcpu->stat.instruction_sigp_store_adtl_status++;
+		break;
 	case SIGP_SET_PREFIX:
 		vcpu->stat.instruction_sigp_prefix++;
 		break;
@@ -431,7 +434,7 @@
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	order_code = kvm_s390_get_base_disp_rs(vcpu);
+	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 	if (handle_sigp_order_in_user_space(vcpu, order_code))
 		return -EOPNOTSUPP;
 
@@ -473,7 +476,7 @@
 	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
 	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
 	struct kvm_vcpu *dest_vcpu;
-	u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 
 	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
 
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index f0b8544..b2c76f6 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -780,8 +780,8 @@
 		zpci_cleanup_bus_resources(zdev);
 		return -EIO;
 	}
-
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
+	pci_bus_add_devices(zdev->bus);
 	return 0;
 }
 
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 1bc09ee..d5462b7 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -58,20 +58,23 @@
 
 	need_domain_info = need_domain_info || hose->index;
 	hose->need_domain_info = need_domain_info;
-	if (bus) {
-		next_busno = bus->busn_res.end + 1;
-		/* Don't allow 8-bit bus number overflow inside the hose -
-		   reserve some space for bridges. */
-		if (next_busno > 224) {
-			next_busno = 0;
-			need_domain_info = 1;
-		}
 
-		pci_bus_size_bridges(bus);
-		pci_bus_assign_resources(bus);
-	} else {
+	if (!bus) {
 		pci_free_resource_list(&resources);
+		return;
 	}
+
+	next_busno = bus->busn_res.end + 1;
+	/* Don't allow 8-bit bus number overflow inside the hose -
+	   reserve some space for bridges. */
+	if (next_busno > 224) {
+		next_busno = 0;
+		need_domain_info = 1;
+	}
+
+	pci_bus_size_bridges(bus);
+	pci_bus_assign_resources(bus);
+	pci_bus_add_devices(bus);
 }
 
 /*
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index ec2e2e2..cc9b04a 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_SPARC_JUMP_LABEL_H
 #define _ASM_SPARC_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -22,8 +22,6 @@
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -32,4 +30,5 @@
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 899b720..4371f72 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -34,15 +34,17 @@
 
 	root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info,
 				     &resources);
-	if (root_bus) {
-		/* Setup IRQs of all devices using custom routines */
-		pci_fixup_irqs(pci_common_swizzle, info->map_irq);
-
-		/* Assign devices with resources */
-		pci_assign_unassigned_resources();
-	} else {
+	if (!root_bus) {
 		pci_free_resource_list(&resources);
+		return;
 	}
+
+	/* Setup IRQs of all devices using custom routines */
+	pci_fixup_irqs(pci_common_swizzle, info->map_irq);
+
+	/* Assign devices with resources */
+	pci_assign_unassigned_resources();
+	pci_bus_add_devices(root_bus);
 }
 
 void pcibios_fixup_bus(struct pci_bus *pbus)
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9ce5afe..6f7251f 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -639,10 +639,7 @@
 				       (unsigned long long)r->end,
 				       (unsigned int)r->flags);
 
-			if (pci_claim_resource(dev, i) == 0)
-				continue;
-
-			pci_claim_bridge_resource(dev, i);
+			pci_claim_resource(dev, i);
 		}
 	}
 
@@ -677,11 +674,10 @@
 	}
 
 	pci_of_scan_bus(pbm, node, bus);
-	pci_bus_add_devices(bus);
 	pci_bus_register_of_sysfs(bus);
 
 	pci_claim_bus_resources(bus);
-
+	pci_bus_add_devices(bus);
 	return bus;
 }
 
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 6cc78c2..24384e1 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -391,12 +391,16 @@
 	struct linux_pbm_info *pbm = &pcic->pbm;
 
 	pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, &pcic_ops, pbm);
+	if (!pbm->pci_bus)
+		return;
+
 #if 0 /* deadwood transplanted from sparc64 */
 	pci_fill_in_pbm_cookies(pbm->pci_bus, pbm, pbm->prom_node);
 	pci_record_assignments(pbm, pbm->pci_bus);
 	pci_assign_unassigned(pbm, pbm->pci_bus);
 	pci_fixup_irq(pbm, pbm->pci_bus);
 #endif
+	pci_bus_add_devices(pbm->pci_bus);
 }
 
 /*
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 2f80d23..18147a5 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@
 	.rating	= 100,
 	.read	= timer_cs_read,
 	.mask	= CLOCKSOURCE_MASK(64),
-	.shift	= 2,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static __init int setup_timer_cs(void)
 {
 	timer_cs_enabled = 1;
-	timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
-	                                    timer_cs.shift);
-
-	return clocksource_register(&timer_cs);
+	return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 325df47..9475a74 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -339,6 +339,8 @@
 			struct pci_bus *next_bus;
 			struct pci_dev *dev;
 
+			pci_bus_add_devices(root_bus);
+
 			list_for_each_entry(dev, &root_bus->devices, bus_list) {
 				/*
 				 * Find the PCI host controller, ie. the 1st
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 2c95f37..b1df847 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1030,6 +1030,8 @@
 alloc_mem_map_failed:
 			break;
 		}
+
+		pci_bus_add_devices(root_bus);
 	}
 
 	return 0;
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d412b08..00178ec 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@
 
 void update_vsyscall(struct timekeeper *tk)
 {
-	if (tk->tkr.clock != &cycle_counter_cs)
+	if (tk->tkr_mono.clock != &cycle_counter_cs)
 		return;
 
 	write_seqcount_begin(&vdso_data->tb_seq);
 
-	vdso_data->cycle_last		= tk->tkr.cycle_last;
-	vdso_data->mask			= tk->tkr.mask;
-	vdso_data->mult			= tk->tkr.mult;
-	vdso_data->shift		= tk->tkr.shift;
+	vdso_data->cycle_last		= tk->tkr_mono.cycle_last;
+	vdso_data->mask			= tk->tkr_mono.mask;
+	vdso_data->mult			= tk->tkr_mono.mult;
+	vdso_data->shift		= tk->tkr_mono.shift;
 
 	vdso_data->wall_time_sec	= tk->xtime_sec;
-	vdso_data->wall_time_snsec	= tk->tkr.xtime_nsec;
+	vdso_data->wall_time_snsec	= tk->tkr_mono.xtime_nsec;
 
 	vdso_data->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdso_data->monotonic_time_snsec	= tk->tkr.xtime_nsec
+	vdso_data->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr.shift);
+						<< tk->tkr_mono.shift);
 	while (vdso_data->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		vdso_data->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr.shift;
+					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
 		vdso_data->monotonic_time_sec++;
 	}
 
 	vdso_data->wall_time_coarse_sec	= tk->xtime_sec;
-	vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
-						 tk->tkr.shift);
+	vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+						 tk->tkr_mono.shift);
 
 	vdso_data->monotonic_time_coarse_sec =
 		vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index 374a055..d45fa5f 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -266,17 +266,10 @@
 	pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq);
 
 	if (!pci_has_flag(PCI_PROBE_ONLY)) {
-		/*
-		 * Size the bridge windows.
-		 */
 		pci_bus_size_bridges(puv3_bus);
-
-		/*
-		 * Assign resources.
-		 */
 		pci_bus_assign_resources(puv3_bus);
 	}
-
+	pci_bus_add_devices(puv3_bus);
 	return 0;
 }
 subsys_initcall(pci_common_init);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b7d31ca..faff693 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -235,12 +235,10 @@
 	def_bool y
 
 config ZONE_DMA32
-	bool
-	default X86_64
+	def_bool y if X86_64
 
 config AUDIT_ARCH
-	bool
-	default X86_64
+	def_bool y if X86_64
 
 config ARCH_SUPPORTS_OPTIMIZED_INLINING
 	def_bool y
@@ -891,7 +889,8 @@
        depends on !SMP && X86_LOCAL_APIC
 
 config X86_UP_APIC
-	bool "Local APIC support on uniprocessors"
+	bool "Local APIC support on uniprocessors" if !PCI_MSI
+	default PCI_MSI
 	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
 	---help---
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
@@ -903,10 +902,6 @@
 	  performance counters), and the NMI watchdog which detects hard
 	  lockups.
 
-config X86_UP_APIC_MSI
-	def_bool y
-	select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
-
 config X86_UP_IOAPIC
 	bool "IO-APIC support on uniprocessors"
 	depends on X86_UP_APIC
@@ -925,8 +920,8 @@
 	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-	def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
-	depends on X86_LOCAL_APIC
+	def_bool y
+	depends on X86_LOCAL_APIC || X86_UP_IOAPIC
 	select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1145,10 +1140,10 @@
 	depends on MICROCODE
 
 config MICROCODE_INTEL_EARLY
-	def_bool n
+	bool
 
 config MICROCODE_AMD_EARLY
-	def_bool n
+	bool
 
 config MICROCODE_EARLY
 	bool "Early load microcode"
@@ -1300,14 +1295,14 @@
 	def_bool y
 	depends on X86_64 || HIGHMEM64G
 
-config DIRECT_GBPAGES
-	bool "Enable 1GB pages for kernel pagetables" if EXPERT
-	default y
-	depends on X86_64
+config X86_DIRECT_GBPAGES
+	def_bool y
+	depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
 	---help---
-	  Allow the kernel linear mapping to use 1GB pages on CPUs that
-	  support it. This can improve the kernel's performance a tiny bit by
-	  reducing TLB pressure. If in doubt, say "Y".
+	  Certain kernel features effectively disable kernel
+	  linear 1 GB mappings (even if the CPU otherwise
+	  supports them), so don't confuse the user by printing
+	  that we have them enabled.
 
 # Common NUMA Features
 config NUMA
@@ -1747,14 +1742,11 @@
 	depends on KEXEC_FILE
 	---help---
 	  This option makes kernel signature verification mandatory for
-	  kexec_file_load() syscall. If kernel is signature can not be
-	  verified, kexec_file_load() will fail.
+	  the kexec_file_load() syscall.
 
-	  This option enforces signature verification at generic level.
-	  One needs to enable signature verification for type of kernel
-	  image being loaded to make sure it works. For example, enable
-	  bzImage signature verification option to be able to load and
-	  verify signatures of bzImage. Otherwise kernel loading will fail.
+	  In addition to that option, you need to enable signature
+	  verification for the corresponding kernel image type being
+	  loaded in order for this to work.
 
 config KEXEC_BZIMAGE_VERIFY_SIG
 	bool "Enable bzImage signature verification support"
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index bb13763..d7b1f65 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -295,7 +295,8 @@
 	return slots_fetch_random();
 }
 
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
+				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size)
@@ -315,6 +316,8 @@
 	}
 #endif
 
+	boot_params->hdr.loadflags |= KASLR_FLAG;
+
 	/* Record the various known unsafe memory ranges. */
 	mem_avoid_init((unsigned long)input, input_size,
 		       (unsigned long)output, output_size);
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1d7fbbc..8ef964d 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -29,6 +29,7 @@
 #include <asm/page_types.h>
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
 	__HEAD
 ENTRY(startup_32)
@@ -102,7 +103,7 @@
 	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
 	 * us to not reload segments
 	 */
-	testb	$(1<<6), BP_loadflags(%esi)
+	testb	$KEEP_SEGMENTS, BP_loadflags(%esi)
 	jnz	1f
 
 	cli
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6b1766c..b0c0d16 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -31,6 +31,7 @@
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
 	__HEAD
 	.code32
@@ -46,7 +47,7 @@
 	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
 	 * us to not reload segments
 	 */
-	testb $(1<<6), BP_loadflags(%esi)
+	testb $KEEP_SEGMENTS, BP_loadflags(%esi)
 	jnz 1f
 
 	cli
@@ -164,7 +165,7 @@
 	/* After gdt is loaded */
 	xorl	%eax, %eax
 	lldt	%ax
-	movl    $0x20, %eax
+	movl    $__BOOT_TSS, %eax
 	ltr	%ax
 
 	/*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a950864..a107b93 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -377,6 +377,9 @@
 
 	real_mode = rmode;
 
+	/* Clear it for solely in-kernel use */
+	real_mode->hdr.loadflags &= ~KASLR_FLAG;
+
 	sanitize_boot_params(real_mode);
 
 	if (real_mode->screen_info.orig_video_mode == 7) {
@@ -401,7 +404,7 @@
 	 * the entire decompressed kernel plus relocation table, or the
 	 * entire decompressed kernel plus .bss and .brk sections.
 	 */
-	output = choose_kernel_location(input_data, input_len, output,
+	output = choose_kernel_location(real_mode, input_data, input_len, output,
 					output_len > run_size ? output_len
 							      : run_size);
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 04477d6..89dd0d7 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,8 @@
 
 #if CONFIG_RANDOMIZE_BASE
 /* aslr.c */
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
+				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size);
@@ -65,7 +66,8 @@
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
+				      unsigned char *input,
 				      unsigned long input_size,
 				      unsigned char *output,
 				      unsigned long output_size)
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 493f3fd..318b846 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -30,7 +30,7 @@
 	int delta = 0;
 
 	while (*s1 || *s2) {
-		delta = *s2 - *s1;
+		delta = *s1 - *s2;
 		if (delta)
 			return delta;
 		s1++;
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index 748e8d0..aa8a96b 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -22,10 +22,8 @@
 /*
  * Common variables
  */
-int adapter;			/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
+int adapter;		/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
 int force_x, force_y;	/* Don't query the BIOS for cols/rows */
-
 int do_restore;		/* Screen contents changed during mode flip */
 int graphic_mode;	/* Graphic mode with linear frame buffer */
 
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 43eda28..05111bb 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -17,6 +17,8 @@
 #include "video.h"
 #include "vesa.h"
 
+static u16 video_segment;
+
 static void store_cursor_position(void)
 {
 	struct biosregs ireg, oreg;
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index 0bb2549..b54e032 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -91,7 +91,6 @@
 #define ADAPTER_VGA	2
 
 extern int adapter;
-extern u16 video_segment;
 extern int force_x, force_y;	/* Don't query the BIOS for cols/rows */
 extern int do_restore;		/* Restore screen contents */
 extern int graphic_mode;	/* Graphics mode with linear frame buffer */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 419819d..aaa1118 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -248,7 +248,7 @@
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4c311dd..315b861 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -243,7 +243,7 @@
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 26d49eb..225be06 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -178,7 +178,7 @@
 	## 2a) PROCESS FULL BLOCKS:
 	################################################################
 full_block:
-	movq    $128,%rax
+	movl    $128,%eax
 	lea     128*8*2(block_0), block_1
 	lea     128*8*3(block_0), block_2
 	add     $128*8*1, block_0
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index a039d21..a350c99 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -264,7 +264,7 @@
 	movq	R1,	8(%rsi)
 
 	popq	R1
-	movq	$1,%rax
+	movl	$1,%eax
 	ret
 ENDPROC(twofish_enc_blk)
 
@@ -316,6 +316,6 @@
 	movq	R1,	8(%rsi)
 
 	popq	R1
-	movq	$1,%rax
+	movl	$1,%eax
 	ret
 ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e785b42..bb635c6 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,6 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
-obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
 
 obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index d0165c9..c81d35e6 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -161,8 +161,7 @@
 }
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
-				   struct sigcontext_ia32 __user *sc,
-				   unsigned int *pax)
+				   struct sigcontext_ia32 __user *sc)
 {
 	unsigned int tmpflags, err = 0;
 	void __user *buf;
@@ -184,7 +183,7 @@
 		RELOAD_SEG(es);
 
 		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-		COPY(dx); COPY(cx); COPY(ip);
+		COPY(dx); COPY(cx); COPY(ip); COPY(ax);
 		/* Don't touch extended registers */
 
 		COPY_SEG_CPL3(cs);
@@ -197,12 +196,12 @@
 
 		get_user_ex(tmp, &sc->fpstate);
 		buf = compat_ptr(tmp);
-
-		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
 
 	err |= restore_xstate_sig(buf, 1);
 
+	force_iret();
+
 	return err;
 }
 
@@ -211,7 +210,6 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
 	sigset_t set;
-	unsigned int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -224,9 +222,9 @@
 
 	set_current_blocked(&set);
 
-	if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
+	if (ia32_restore_sigcontext(regs, &frame->sc))
 		goto badframe;
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit sigreturn");
@@ -238,7 +236,6 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_ia32 __user *frame;
 	sigset_t set;
-	unsigned int ax;
 
 	frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -249,13 +246,13 @@
 
 	set_current_blocked(&set);
 
-	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit rt sigreturn");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebca..a821b1c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -30,24 +30,13 @@
 
 	.section .entry.text, "ax"
 
-	.macro IA32_ARG_FIXUP noebp=0
-	movl	%edi,%r8d
-	.if \noebp
-	.else
-	movl	%ebp,%r9d
-	.endif
-	xchg	%ecx,%esi
-	movl	%ebx,%edi
-	movl	%edx,%edx	/* zero extension */
-	.endm 
-
-	/* clobbers %eax */	
-	.macro  CLEAR_RREGS offset=0, _r9=rax
+	/* clobbers %rax */
+	.macro  CLEAR_RREGS _r9=rax
 	xorl 	%eax,%eax
-	movq	%rax,\offset+R11(%rsp)
-	movq	%rax,\offset+R10(%rsp)
-	movq	%\_r9,\offset+R9(%rsp)
-	movq	%rax,\offset+R8(%rsp)
+	movq	%rax,R11(%rsp)
+	movq	%rax,R10(%rsp)
+	movq	%\_r9,R9(%rsp)
+	movq	%rax,R8(%rsp)
 	.endm
 
 	/*
@@ -60,14 +49,14 @@
 	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
 	 * an appropriately invalid value.
 	 */
-	.macro LOAD_ARGS32 offset, _r9=0
+	.macro LOAD_ARGS32 _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl RCX(%rsp),%ecx
+	movl RDX(%rsp),%edx
+	movl RSI(%rsp),%esi
+	movl RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -99,54 +88,69 @@
 /*
  * 32bit SYSENTER instruction entry.
  *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6	
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
  * with the int 0x80 path.
- */ 	
+ */
 ENTRY(ia32_sysenter_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
 	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rsp,rbp
-	SWAPGS_UNSAFE_STACK
-	movq	PER_CPU_VAR(kernel_stack), %rsp
-	addq	$(KERNEL_STACK_OFFSET),%rsp
+
 	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs, here we enable it straight after entry:
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
 	 */
+	SWAPGS_UNSAFE_STACK
+	movq	PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
 	ENABLE_INTERRUPTS(CLBR_NONE)
- 	movl	%ebp,%ebp		/* zero extension */
-	pushq_cfi $__USER32_DS
-	/*CFI_REL_OFFSET ss,0*/
-	pushq_cfi %rbp
-	CFI_REL_OFFSET rsp,0
-	pushfq_cfi
-	/*CFI_REL_OFFSET rflags,0*/
-	movl	TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
-	CFI_REGISTER rip,r10
-	pushq_cfi $__USER32_CS
-	/*CFI_REL_OFFSET cs,0*/
+
+	/* Zero-extending 32-bit regs, do not remove */
+	movl	%ebp, %ebp
 	movl	%eax, %eax
-	pushq_cfi %r10
-	CFI_REL_OFFSET rip,0
-	pushq_cfi %rax
+
+	movl	ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
+	CFI_REGISTER rip,r10
+
+	/* Construct struct pt_regs on stack */
+	pushq_cfi	$__USER32_DS		/* pt_regs->ss */
+	pushq_cfi	%rbp			/* pt_regs->sp */
+	CFI_REL_OFFSET	rsp,0
+	pushfq_cfi				/* pt_regs->flags */
+	pushq_cfi	$__USER32_CS		/* pt_regs->cs */
+	pushq_cfi	%r10 /* pt_regs->ip = thread_info->sysenter_return */
+	CFI_REL_OFFSET	rip,0
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rcx			/* pt_regs->cx */
+	pushq_cfi_reg	rax			/* pt_regs->ax */
 	cld
-	SAVE_ARGS 0,1,0
- 	/* no need to do an access_ok check here because rbp has been
- 	   32bit zero extended */ 
+	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 10*8
+
+	/*
+	 * no need to do an access_ok check here because rbp has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
@@ -157,42 +161,80 @@
 	 * ourselves.  To save a few cycles, we can check whether
 	 * NT was set instead of doing an unconditional popfq.
 	 */
-	testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+	testl $X86_EFLAGS_NT,EFLAGS(%rsp)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	ia32_badsys
 sysenter_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	movl	%ebp,%r9d	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
-	movq	%rax,RAX-ARGOFFSET(%rsp)
+	movq	%rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	/* clear IF, that popfq doesn't enable interrupts early */
-	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
-	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
-	CFI_REGISTER rip,rdx
-	RESTORE_ARGS 0,24,0,0,0,0
+	/*
+	 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
+	 * NMI between STI and SYSEXIT has poorly specified behavior,
+	 * and and NMI followed by an IRQ with usergs is fatal.  So
+	 * we just pretend we're using SYSEXIT but we really use
+	 * SYSRETL instead.
+	 *
+	 * This code path is still called 'sysexit' because it pairs
+	 * with 'sysenter' and it uses the SYSENTER calling convention.
+	 */
+	andl    $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	movl	RIP(%rsp),%ecx		/* User %eip */
+	CFI_REGISTER rip,rcx
+	RESTORE_RSI_RDI
+	xorl	%edx,%edx		/* avoid info leaks */
 	xorq	%r8,%r8
 	xorq	%r9,%r9
 	xorq	%r10,%r10
-	xorq	%r11,%r11
-	popfq_cfi
+	movl	EFLAGS(%rsp),%r11d	/* User eflags */
 	/*CFI_RESTORE rflags*/
-	popq_cfi %rcx				/* User %esp */
-	CFI_REGISTER rsp,rcx
 	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS_SYSEXIT32
+
+	/*
+	 * SYSRETL works even on Intel CPUs.  Use it in preference to SYSEXIT,
+	 * since it avoids a dicey window with interrupts enabled.
+	 */
+	movl	RSP(%rsp),%esp
+
+	/*
+	 * USERGS_SYSRET32 does:
+	 *  gsbase = user's gs base
+	 *  eip = ecx
+	 *  rflags = r11
+	 *  cs = __USER32_CS
+	 *  ss = __USER_DS
+	 *
+	 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
+	 *
+	 *  pop %ebp
+	 *  pop %edx
+	 *  pop %ecx
+	 *
+	 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
+	 * avoid info leaks.  R11 ends up with VDSO32_SYSENTER_RETURN's
+	 * address (already known to user code), and R12-R15 are
+	 * callee-saved and therefore don't contain any interesting
+	 * kernel data.
+	 */
+	USERGS_SYSRET32
 
 	CFI_RESTORE_STATE
 
@@ -205,18 +247,18 @@
 	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
 	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
+	movl RAX(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 	movl %ebx,%edi			/* reload 1st syscall arg */
-	movl RCX-ARGOFFSET(%rsp),%esi	/* reload 2nd syscall arg */
-	movl RDX-ARGOFFSET(%rsp),%edx	/* reload 3rd syscall arg */
-	movl RSI-ARGOFFSET(%rsp),%ecx	/* reload 4th syscall arg */
-	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
+	movl RCX(%rsp),%esi	/* reload 2nd syscall arg */
+	movl RDX(%rsp),%edx	/* reload 3rd syscall arg */
+	movl RSI(%rsp),%ecx	/* reload 4th syscall arg */
+	movl RDI(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -227,13 +269,13 @@
 1:	setbe %al		/* 1 if error, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
 	call __audit_syscall_exit
-	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
+	movq RAX(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz \exit
-	CLEAR_RREGS -ARGOFFSET
+	CLEAR_RREGS
 	jmp int_with_check
 	.endm
 
@@ -253,16 +295,16 @@
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz	sysenter_auditsys
 #endif
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
+	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
@@ -272,94 +314,128 @@
 /*
  * 32bit SYSCALL instruction entry.
  *
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx return EIP 
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
- * %esp user stack 
- * 0(%esp) Arg6
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ecx  return address
+ * ebx  arg1
+ * ebp  arg2	(note: not saved in the stack frame, should not be touched)
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * esp  user stack
+ * 0(%esp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
- * with the int 0x80 path.	
- */ 	
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
+
+	/*
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	movq	PER_CPU_VAR(kernel_stack),%rsp
-	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
-	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
-	movl 	%eax,%eax	/* zero extension */
-	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
-	movq	%rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	movq	%rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+
+	/* Zero-extending 32-bit regs, do not remove */
+	movl	%eax,%eax
+
+	/* Construct struct pt_regs on stack */
+	pushq_cfi	$__USER32_DS		/* pt_regs->ss */
+	pushq_cfi	%r8			/* pt_regs->sp */
+	CFI_REL_OFFSET rsp,0
+	pushq_cfi	%r11			/* pt_regs->flags */
+	pushq_cfi	$__USER32_CS		/* pt_regs->cs */
+	pushq_cfi	%rcx			/* pt_regs->ip */
+	CFI_REL_OFFSET rip,0
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rbp			/* pt_regs->cx */
 	movl	%ebp,%ecx
-	movq	$__USER32_CS,CS-ARGOFFSET(%rsp)
-	movq	$__USER32_DS,SS-ARGOFFSET(%rsp)
-	movq	%r11,EFLAGS-ARGOFFSET(%rsp)
-	/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-	movq	%r8,RSP-ARGOFFSET(%rsp)	
-	CFI_REL_OFFSET rsp,RSP-ARGOFFSET
-	/* no need to do an access_ok check here because r8 has been
-	   32bit zero extended */ 
-	/* hardware stack frame is complete now */	
+	pushq_cfi_reg	rax			/* pt_regs->ax */
+	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 10*8
+
+	/*
+	 * no need to do an access_ok check here because r8 has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
 	ja  ia32_badsys
 cstar_do_call:
-	IA32_ARG_FIXUP 1
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	/* r9 already loaded */	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
-	movl RIP-ARGOFFSET(%rsp),%ecx
+	andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	RESTORE_RSI_RDI_RDX
+	movl RIP(%rsp),%ecx
 	CFI_REGISTER rip,rcx
-	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
+	movl EFLAGS(%rsp),%r11d
 	/*CFI_REGISTER rflags,r11*/
 	xorq	%r10,%r10
 	xorq	%r9,%r9
 	xorq	%r8,%r8
 	TRACE_IRQS_ON
-	movl RSP-ARGOFFSET(%rsp),%esp
+	movl RSP(%rsp),%esp
 	CFI_RESTORE rsp
+	/*
+	 * 64bit->32bit SYSRET restores eip from ecx,
+	 * eflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 * (Note: 32bit->32bit SYSRET is different: since r11
+	 * does not exist, it merely sets eflags.IF=1).
+	 */
 	USERGS_SYSRET32
-	
+
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	CFI_RESTORE_STATE
-	movl %r9d,R9-ARGOFFSET(%rsp)	/* register to be clobbered by call */
+	movl %r9d,R9(%rsp)	/* register to be clobbered by call */
 	auditsys_entry_common
-	movl R9-ARGOFFSET(%rsp),%r9d	/* reload 6th syscall arg */
+	movl R9(%rsp),%r9d	/* reload 6th syscall arg */
 	jmp cstar_dispatch
 
 sysretl_audit:
@@ -368,17 +444,17 @@
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
-	SAVE_REST
-	CLEAR_RREGS 0, r9
+	SAVE_EXTRA_REGS
+	CLEAR_RREGS r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	LOAD_ARGS32 1	/* reload args from stack in case ptrace changed it */
+	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -391,78 +467,94 @@
 	jmp ia32_sysret
 	CFI_ENDPROC
 
-/* 
- * Emulated IA32 system calls via int 0x80. 
+/*
+ * Emulated IA32 system calls via int 0x80.
  *
- * Arguments:	 
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6	(note: not saved in the stack frame, should not be touched)
  *
  * Notes:
- * Uses the same stack frame as the x86-64 version.	
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.	
- */ 				
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
 
 ENTRY(ia32_syscall)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-RIP
-	/*CFI_REL_OFFSET	ss,SS-RIP*/
-	CFI_REL_OFFSET	rsp,RSP-RIP
-	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
-	/*CFI_REL_OFFSET	cs,CS-RIP*/
-	CFI_REL_OFFSET	rip,RIP-RIP
+	CFI_DEF_CFA	rsp,5*8
+	/*CFI_REL_OFFSET	ss,4*8 */
+	CFI_REL_OFFSET	rsp,3*8
+	/*CFI_REL_OFFSET	rflags,2*8 */
+	/*CFI_REL_OFFSET	cs,1*8 */
+	CFI_REL_OFFSET	rip,0*8
+
+	/*
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	SWAPGS
-	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
-	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	movl %eax,%eax
-	pushq_cfi %rax
+
+	/* Zero-extending 32-bit regs, do not remove */
+	movl	%eax,%eax
+
+	/* Construct struct pt_regs on stack (iret frame is already on stack) */
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rcx			/* pt_regs->cx */
+	pushq_cfi_reg	rax			/* pt_regs->ax */
 	cld
-	/* note the registers are not zero extended to the sf.
-	   this could be a problem. */
-	SAVE_ARGS 0,1,0
-	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	sub	$(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 10*8
+
+	orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+	testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 ia32_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl %edi,%r8d	/* arg5 */
+	movl %ebp,%r9d	/* arg6 */
+	xchg %ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl %ebx,%edi	/* arg1 */
+	movl %edx,%edx	/* arg3 (zero extension) */
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 ia32_ret_from_sys_call:
-	CLEAR_RREGS -ARGOFFSET
-	jmp int_ret_from_sys_call 
+	CLEAR_RREGS
+	jmp int_ret_from_sys_call
 
-ia32_tracesys:			 
-	SAVE_REST
+ia32_tracesys:
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	LOAD_ARGS32	/* reload args from stack in case ptrace changed it */
+	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
 END(ia32_syscall)
 
 ia32_badsys:
-	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+	movq $0,ORIG_RAX(%rsp)
 	movq $-ENOSYS,%rax
 	jmp ia32_sysret
 
@@ -479,8 +571,6 @@
 
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn
-	PTREGSCALL stub32_execve, compat_sys_execve
-	PTREGSCALL stub32_execveat, compat_sys_execveat
 	PTREGSCALL stub32_fork, sys_fork
 	PTREGSCALL stub32_vfork, sys_vfork
 
@@ -492,24 +582,23 @@
 
 	ALIGN
 ia32_ptregs_common:
-	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-ARGOFFSET
-	CFI_REL_OFFSET	rax,RAX-ARGOFFSET
-	CFI_REL_OFFSET	rcx,RCX-ARGOFFSET
-	CFI_REL_OFFSET	rdx,RDX-ARGOFFSET
-	CFI_REL_OFFSET	rsi,RSI-ARGOFFSET
-	CFI_REL_OFFSET	rdi,RDI-ARGOFFSET
-	CFI_REL_OFFSET	rip,RIP-ARGOFFSET
-/*	CFI_REL_OFFSET	cs,CS-ARGOFFSET*/
-/*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
-	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
-/*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	SAVE_REST
+	CFI_DEF_CFA	rsp,SIZEOF_PTREGS
+	CFI_REL_OFFSET	rax,RAX
+	CFI_REL_OFFSET	rcx,RCX
+	CFI_REL_OFFSET	rdx,RDX
+	CFI_REL_OFFSET	rsi,RSI
+	CFI_REL_OFFSET	rdi,RDI
+	CFI_REL_OFFSET	rip,RIP
+/*	CFI_REL_OFFSET	cs,CS*/
+/*	CFI_REL_OFFSET	rflags,EFLAGS*/
+	CFI_REL_OFFSET	rsp,RSP
+/*	CFI_REL_OFFSET	ss,SS*/
+	SAVE_EXTRA_REGS 8
 	call *%rax
-	RESTORE_REST
-	jmp  ia32_sysret	/* misbalances the return cache */
+	RESTORE_EXTRA_REGS 8
+	ret
 	CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
deleted file mode 100644
index 51ecd5b..0000000
--- a/arch/x86/ia32/nosyscall.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-
-long compat_ni_syscall(void)
-{
-	return -ENOSYS;
-}
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 8e0ceec..719cd70 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -201,20 +201,6 @@
 				advice);
 }
 
-long sys32_vm86_warning(void)
-{
-	struct task_struct *me = current;
-	static char lastcomm[sizeof(me->comm)];
-
-	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		compat_printk(KERN_INFO
-			      "%s: vm86 mode not supported on 64 bit kernel\n",
-			      me->comm);
-		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	}
-	return -ENOSYS;
-}
-
 asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
 				   size_t count)
 {
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
deleted file mode 100644
index 4754ba0..0000000
--- a/arch/x86/ia32/syscall_ia32.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* System call table for ia32 emulation. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
-#include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
-
-#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void compat_ni_syscall(void);
-
-const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
-#include <asm/syscalls_32.h>
-};
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c..bdf02ee 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,63 @@
 	.endm
 #endif
 
-.macro altinstruction_entry orig alt feature orig_len alt_len
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
 	.long \orig - .
 	.long \alt - .
 	.word \feature
 	.byte \orig_len
 	.byte \alt_len
+	.byte \pad_len
+.endm
+
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+	\oldinstr
+141:
+	.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+	.pushsection .altinstructions,"a"
+	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+	.popsection
+
+	.pushsection .altinstr_replacement,"ax"
+143:
+	\newinstr
+144:
+	.popsection
+.endm
+
+#define old_len			141b-140b
+#define new_len1		144f-143f
+#define new_len2		145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+	\oldinstr
+141:
+	.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+		(alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+	.pushsection .altinstructions,"a"
+	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+	.popsection
+
+	.pushsection .altinstr_replacement,"ax"
+143:
+	\newinstr1
+144:
+	\newinstr2
+145:
+	.popsection
 .endm
 
 #endif  /*  __ASSEMBLY__  */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbe..ba32af0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@
 	s32 repl_offset;	/* offset to replacement instruction */
 	u16 cpuid;		/* cpuid bit set for replacement */
 	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen;	/* length of new instruction, <= instrlen */
-};
+	u8  replacementlen;	/* length of new instruction */
+	u8  padlen;		/* length of build-time padding */
+} __packed;
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@
 }
 #endif	/* CONFIG_SMP */
 
-#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n"
+#define b_replacement(num)	"664"#num
+#define e_replacement(num)	"665"#num
 
-#define b_replacement(number)	"663"#number
-#define e_replacement(number)	"664"#number
+#define alt_end_marker		"663"
+#define alt_slen		"662b-661b"
+#define alt_pad_len		alt_end_marker"b-662b"
+#define alt_total_slen		alt_end_marker"b-661b"
+#define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
 
-#define alt_slen "662b-661b"
-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+#define __OLDINSTR(oldinstr, num)					\
+	"661:\n\t" oldinstr "\n662:\n"					\
+	".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "		\
+		"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
 
-#define ALTINSTR_ENTRY(feature, number)					      \
+#define OLDINSTR(oldinstr, num)						\
+	__OLDINSTR(oldinstr, num)					\
+	alt_end_marker ":\n"
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+	"661:\n\t" oldinstr "\n662:\n"								\
+	".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "	\
+		"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n"	\
+	alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num)					      \
 	" .long 661b - .\n"				/* label           */ \
-	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \
+	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
 	" .word " __stringify(feature) "\n"		/* feature bit     */ \
-	" .byte " alt_slen "\n"				/* source len      */ \
-	" .byte " alt_rlen(number) "\n"			/* replacement len */
+	" .byte " alt_total_slen "\n"			/* source len      */ \
+	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
+	" .byte " alt_pad_len "\n"			/* pad len */
 
-#define DISCARD_ENTRY(number)				/* rlen <= slen */    \
-	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
-
-#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \
-	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num)	/* replacement */     \
+	b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
-	OLDINSTR(oldinstr)						\
+	OLDINSTR(oldinstr, 1)						\
 	".pushsection .altinstructions,\"a\"\n"				\
 	ALTINSTR_ENTRY(feature, 1)					\
 	".popsection\n"							\
-	".pushsection .discard,\"aw\",@progbits\n"			\
-	DISCARD_ENTRY(1)						\
-	".popsection\n"							\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
 	".popsection"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
-	OLDINSTR(oldinstr)						\
+	OLDINSTR_2(oldinstr, 1, 2)					\
 	".pushsection .altinstructions,\"a\"\n"				\
 	ALTINSTR_ENTRY(feature1, 1)					\
 	ALTINSTR_ENTRY(feature2, 2)					\
 	".popsection\n"							\
-	".pushsection .discard,\"aw\",@progbits\n"			\
-	DISCARD_ENTRY(1)						\
-	DISCARD_ENTRY(2)						\
-	".popsection\n"							\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
@@ -146,6 +166,9 @@
 #define alternative(oldinstr, newinstr, feature)			\
 	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22..976b86a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@
 {
 	volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
-	alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+	alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
 		       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
 		       ASM_OUTPUT2("0" (v), "m" (*addr)));
 }
@@ -204,7 +204,6 @@
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
 extern void setup_local_APIC(void);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 2ab1eb3..959e45b 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -95,13 +95,11 @@
  * Stop RDTSC speculation. This is needed when you need to use RDTSC
  * (or get_cycles or vread that possibly accesses the TSC) in a defined
  * code region.
- *
- * (Could use an alternative three way for this if there was one.)
  */
 static __always_inline void rdtsc_barrier(void)
 {
-	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+			  "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b..1c8b50e 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,157 @@
  * for assembly code:
  */
 
-#define R15		  0
-#define R14		  8
-#define R13		 16
-#define R12		 24
-#define RBP		 32
-#define RBX		 40
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15		0*8
+#define R14		1*8
+#define R13		2*8
+#define R12		3*8
+#define RBP		4*8
+#define RBX		5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11		6*8
+#define R10		7*8
+#define R9		8*8
+#define R8		9*8
+#define RAX		10*8
+#define RCX		11*8
+#define RDX		12*8
+#define RSI		13*8
+#define RDI		14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX	15*8
+/* Return frame for iretq */
+#define RIP		16*8
+#define CS		17*8
+#define EFLAGS		18*8
+#define RSP		19*8
+#define SS		20*8
 
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 48
-#define R10		 56
-#define R9		 64
-#define R8		 72
-#define RAX		 80
-#define RCX		 88
-#define RDX		 96
-#define RSI		104
-#define RDI		112
-#define ORIG_RAX	120       /* + error_code */
-/* end of arguments */
+#define SIZEOF_PTREGS	21*8
 
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		128
-#define CS		136
-#define EFLAGS		144
-#define RSP		152
-#define SS		160
-
-#define ARGOFFSET	R11
-
-	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
-
-	.if \save_rcx
-	movq_cfi rcx, 5*8
-	.endif
-
-	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
-	.else
-	movq_cfi rax, 4*8
-	.endif
-
-	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
-	.endif
-
+	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+	subq	$15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
 	.endm
 
-#define ARG_SKIP	(9*8)
+	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
+	.if \r11
+	movq_cfi r11, 6*8+\offset
+	.endif
+	.if \r8910
+	movq_cfi r10, 7*8+\offset
+	movq_cfi r9,  8*8+\offset
+	movq_cfi r8,  9*8+\offset
+	.endif
+	.if \rax
+	movq_cfi rax, 10*8+\offset
+	.endif
+	.if \rcx
+	movq_cfi rcx, 11*8+\offset
+	.endif
+	movq_cfi rdx, 12*8+\offset
+	movq_cfi rsi, 13*8+\offset
+	movq_cfi rdi, 14*8+\offset
+	.endm
+	.macro SAVE_C_REGS offset=0
+	SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+	SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_R891011
+	SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
+	SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
+	SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
+	.endm
 
-	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-			    rstor_r8910=1, rstor_rdx=1
+	.macro SAVE_EXTRA_REGS offset=0
+	movq_cfi r15, 0*8+\offset
+	movq_cfi r14, 1*8+\offset
+	movq_cfi r13, 2*8+\offset
+	movq_cfi r12, 3*8+\offset
+	movq_cfi rbp, 4*8+\offset
+	movq_cfi rbx, 5*8+\offset
+	.endm
+	.macro SAVE_EXTRA_REGS_RBP offset=0
+	movq_cfi rbp, 4*8+\offset
+	.endm
+
+	.macro RESTORE_EXTRA_REGS offset=0
+	movq_cfi_restore 0*8+\offset, r15
+	movq_cfi_restore 1*8+\offset, r14
+	movq_cfi_restore 2*8+\offset, r13
+	movq_cfi_restore 3*8+\offset, r12
+	movq_cfi_restore 4*8+\offset, rbp
+	movq_cfi_restore 5*8+\offset, rbx
+	.endm
+
+	.macro ZERO_EXTRA_REGS
+	xorl	%r15d, %r15d
+	xorl	%r14d, %r14d
+	xorl	%r13d, %r13d
+	xorl	%r12d, %r12d
+	xorl	%ebp, %ebp
+	xorl	%ebx, %ebx
+	.endm
+
+	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 6*8, r11
 	.endif
-
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 7*8, r10
+	movq_cfi_restore 8*8, r9
+	movq_cfi_restore 9*8, r8
 	.endif
-
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 10*8, rax
 	.endif
-
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 11*8, rcx
 	.endif
-
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
+	movq_cfi_restore 12*8, rdx
 	.endif
-
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
-
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
-	.endif
+	movq_cfi_restore 13*8, rsi
+	movq_cfi_restore 14*8, rdi
+	.endm
+	.macro RESTORE_C_REGS
+	RESTORE_C_REGS_HELPER 1,1,1,1,1
+	.endm
+	.macro RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_C_REGS_HELPER 0,1,1,1,1
+	.endm
+	.macro RESTORE_C_REGS_EXCEPT_RCX
+	RESTORE_C_REGS_HELPER 1,0,1,1,1
+	.endm
+	.macro RESTORE_C_REGS_EXCEPT_R11
+	RESTORE_C_REGS_HELPER 1,1,0,1,1
+	.endm
+	.macro RESTORE_C_REGS_EXCEPT_RCX_R11
+	RESTORE_C_REGS_HELPER 1,0,0,1,1
+	.endm
+	.macro RESTORE_RSI_RDI
+	RESTORE_C_REGS_HELPER 0,0,0,0,0
+	.endm
+	.macro RESTORE_RSI_RDI_RDX
+	RESTORE_C_REGS_HELPER 0,0,0,0,1
 	.endm
 
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
-	.if \skiprax
-	.else
-	movq \offset+72(%rsp), %rax
-	.endif
-	.endm
-
-#define REST_SKIP	(6*8)
-
-	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
-	.endm
-
-	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
-	.endm
-
-	.macro SAVE_ALL
-	SAVE_ARGS
-	SAVE_REST
-	.endm
-
-	.macro RESTORE_ALL addskip=0
-	RESTORE_REST
-	RESTORE_ARGS 1, \addskip
+	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+	addq $15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
 	.endm
 
 	.macro icebp
@@ -210,37 +224,23 @@
  */
 
 	.macro SAVE_ALL
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	.endm
 
 	.macro RESTORE_ALL
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebp
-	CFI_RESTORE ebp
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg ebx
+	popl_cfi_reg ecx
+	popl_cfi_reg edx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebp
+	popl_cfi_reg eax
 	.endm
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 59c6c40..acdee09 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -301,7 +301,7 @@
 		sp = task_pt_regs(current)->sp;
 	} else {
 		/* -128 for the x32 ABI redzone */
-		sp = this_cpu_read(old_rsp) - 128;
+		sp = task_pt_regs(current)->sp - 128;
 	}
 
 	return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 90a5485..854c04b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -231,7 +231,9 @@
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
@@ -418,6 +420,7 @@
 			 " .word %P0\n"		/* 1: do replace */
 			 " .byte 2b - 1b\n"	/* source len */
 			 " .byte 0\n"		/* replacement len */
+			 " .byte 0\n"		/* pad len */
 			 ".previous\n"
 			 /* skipping size check since replacement size = 0 */
 			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +435,7 @@
 			 " .word %P0\n"		/* feature bit */
 			 " .byte 2b - 1b\n"	/* source len */
 			 " .byte 0\n"		/* replacement len */
+			 " .byte 0\n"		/* pad len */
 			 ".previous\n"
 			 /* skipping size check since replacement size = 0 */
 			 : : "i" (bit) : : t_no);
@@ -457,6 +461,7 @@
 			     " .word %P1\n"		/* feature bit */
 			     " .byte 2b - 1b\n"		/* source len */
 			     " .byte 4f - 3f\n"		/* replacement len */
+			     " .byte 0\n"		/* pad len */
 			     ".previous\n"
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +488,30 @@
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
-/*
- * We need to spell the jumps to the compiler because, depending on the offset,
- * the replacement jump can be bigger than the original jump, and this we cannot
- * have. Thus, we force the jump to the widest, 4-byte, signed relative
- * offset even though the last would often fit in less bytes.
- */
-		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
 			 "2:\n"
+			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			         "((5f-4f) - (2b-1b)),0x90\n"
+			 "3:\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"		/* src offset */
-			 " .long 3f - .\n"		/* repl offset */
+			 " .long 4f - .\n"		/* repl offset */
 			 " .word %P1\n"			/* always replace */
-			 " .byte 2b - 1b\n"		/* src len */
-			 " .byte 4f - 3f\n"		/* repl len */
+			 " .byte 3b - 1b\n"		/* src len */
+			 " .byte 5f - 4f\n"		/* repl len */
+			 " .byte 3b - 2b\n"		/* pad len */
 			 ".previous\n"
 			 ".section .altinstr_replacement,\"ax\"\n"
-			 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
-			 "4:\n"
+			 "4: jmp %l[t_no]\n"
+			 "5:\n"
 			 ".previous\n"
 			 ".section .altinstructions,\"a\"\n"
 			 " .long 1b - .\n"		/* src offset */
 			 " .long 0\n"			/* no replacement */
 			 " .word %P0\n"			/* feature bit */
-			 " .byte 2b - 1b\n"		/* src len */
+			 " .byte 3b - 1b\n"		/* src len */
 			 " .byte 0\n"			/* repl len */
+			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
 			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
 			 : : t_dynamic, t_no);
@@ -527,6 +531,7 @@
 			     " .word %P2\n"		/* always replace */
 			     " .byte 2b - 1b\n"		/* source len */
 			     " .byte 4f - 3f\n"		/* replacement len */
+			     " .byte 0\n"		/* pad len */
 			     ".previous\n"
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +546,7 @@
 			     " .word %P1\n"		/* feature bit */
 			     " .byte 4b - 3b\n"		/* src len */
 			     " .byte 6f - 5f\n"		/* repl len */
+			     " .byte 0\n"		/* pad len */
 			     ".previous\n"
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a94b82e..a0bf89f 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -376,11 +376,16 @@
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
-#define set_intr_gate(n, addr)						\
+#define set_intr_gate_notrace(n, addr)					\
 	do {								\
 		BUG_ON((unsigned)n > 0xFF);				\
 		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
 			  __KERNEL_CS);					\
+	} while (0)
+
+#define set_intr_gate(n, addr)						\
+	do {								\
+		set_intr_gate_notrace(n, addr);				\
 		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
 				0, 0, __KERNEL_CS);			\
 	} while (0)
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f1598..de1cdaf 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
 	CFI_ADJUST_CFA_OFFSET 8
 	.endm
 
+	.macro pushq_cfi_reg reg
+	pushq %\reg
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popq_cfi reg
 	popq \reg
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm
 
+	.macro popq_cfi_reg reg
+	popq %\reg
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfq_cfi
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	.endm
 
+	.macro pushl_cfi_reg reg
+	pushl %\reg
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popl_cfi reg
 	popl \reg
 	CFI_ADJUST_CFA_OFFSET -4
 	.endm
 
+	.macro popl_cfi_reg reg
+	popl %\reg
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfl_cfi
 	pushfl
 	CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 25bce45..3738b13 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -2,6 +2,8 @@
 #define _ASM_X86_EFI_H
 
 #include <asm/i387.h>
+#include <asm/pgtable.h>
+
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
  * with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@
 extern struct efi_scratch efi_scratch;
 extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
-extern void __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(void);
+extern pgd_t * __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_unmap_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a..935588d9 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,10 +171,11 @@
 static inline void elf_common_init(struct thread_struct *t,
 				   struct pt_regs *regs, const u16 ds)
 {
-	regs->ax = regs->bx = regs->cx = regs->dx = 0;
-	regs->si = regs->di = regs->bp = 0;
+	/* Commented-out registers are cleared in stub_execve */
+	/*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
+	regs->si = regs->di /*= regs->bp*/ = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
-	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+	/*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
 	t->fs = t->gs = 0;
 	t->fsindex = t->gsindex = 0;
 	t->ds = t->es = ds;
@@ -365,6 +366,7 @@
 struct va_alignment {
 	int flags;
 	unsigned long mask;
+	unsigned long bits;
 } ____cacheline_aligned;
 
 extern struct va_alignment va_align;
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 72ba21a..da5e967 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -67,6 +67,34 @@
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+	per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+	tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+	return new == this_cpu_read_stable(fpu_owner_task) &&
+		cpu == new->thread.fpu.last_cpu;
+}
+
 static inline int is_ia32_compat_frame(void)
 {
 	return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@
 
 static inline void fx_finit(struct i387_fxsave_struct *fx)
 {
-	memset(fx, 0, xstate_size);
 	fx->cwd = 0x37f;
 	fx->mxcsr = MXCSR_DEFAULT;
 }
@@ -351,17 +378,6 @@
 	__thread_set_has_fpu(tsk);
 }
 
-static inline void __drop_fpu(struct task_struct *tsk)
-{
-	if (__thread_has_fpu(tsk)) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		__thread_fpu_end(tsk);
-	}
-}
-
 static inline void drop_fpu(struct task_struct *tsk)
 {
 	/*
@@ -369,21 +385,37 @@
 	 */
 	preempt_disable();
 	tsk->thread.fpu_counter = 0;
-	__drop_fpu(tsk);
+
+	if (__thread_has_fpu(tsk)) {
+		/* Ignore delayed exceptions from user space */
+		asm volatile("1: fwait\n"
+			     "2:\n"
+			     _ASM_EXTABLE(1b, 2b));
+		__thread_fpu_end(tsk);
+	}
+
 	clear_stopped_child_used_math(tsk);
 	preempt_enable();
 }
 
-static inline void drop_init_fpu(struct task_struct *tsk)
+static inline void restore_init_xstate(void)
+{
+	if (use_xsave())
+		xrstor_state(init_xstate_buf, -1);
+	else
+		fxrstor_checking(&init_xstate_buf->i387);
+}
+
+/*
+ * Reset the FPU state in the eager case and drop it in the lazy case (later use
+ * will reinit it).
+ */
+static inline void fpu_reset_state(struct task_struct *tsk)
 {
 	if (!use_eager_fpu())
 		drop_fpu(tsk);
-	else {
-		if (use_xsave())
-			xrstor_state(init_xstate_buf, -1);
-		else
-			fxrstor_checking(&init_xstate_buf->i387);
-	}
+	else
+		restore_init_xstate();
 }
 
 /*
@@ -400,24 +432,6 @@
  */
 typedef struct { int preload; } fpu_switch_t;
 
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
-	per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
-	return new == this_cpu_read_stable(fpu_owner_task) &&
-		cpu == new->thread.fpu.last_cpu;
-}
-
 static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
 {
 	fpu_switch_t fpu;
@@ -426,13 +440,17 @@
 	 * If the task has used the math, pre-load the FPU on xsave processors
 	 * or if the past 5 consecutive context-switches used math.
 	 */
-	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-					     new->thread.fpu_counter > 5);
+	fpu.preload = tsk_used_math(new) &&
+		      (use_eager_fpu() || new->thread.fpu_counter > 5);
+
 	if (__thread_has_fpu(old)) {
 		if (!__save_init_fpu(old))
-			cpu = ~0;
-		old->thread.fpu.last_cpu = cpu;
-		old->thread.fpu.has_fpu = 0;	/* But leave fpu_owner_task! */
+			task_disable_lazy_fpu_restore(old);
+		else
+			old->thread.fpu.last_cpu = cpu;
+
+		/* But leave fpu_owner_task! */
+		old->thread.fpu.has_fpu = 0;
 
 		/* Don't change CR0.TS if we just switch! */
 		if (fpu.preload) {
@@ -443,10 +461,10 @@
 			stts();
 	} else {
 		old->thread.fpu_counter = 0;
-		old->thread.fpu.last_cpu = ~0;
+		task_disable_lazy_fpu_restore(old);
 		if (fpu.preload) {
 			new->thread.fpu_counter++;
-			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+			if (fpu_lazy_restore(new, cpu))
 				fpu.preload = 0;
 			else
 				prefetch(new->thread.fpu.state);
@@ -466,7 +484,7 @@
 {
 	if (fpu.preload) {
 		if (unlikely(restore_fpu_checking(new)))
-			drop_init_fpu(new);
+			fpu_reset_state(new);
 	}
 }
 
@@ -495,10 +513,12 @@
 }
 
 /*
- * Need to be preemption-safe.
+ * Needs to be preemption-safe.
  *
  * NOTE! user_fpu_begin() must be used only immediately before restoring
- * it. This function does not do any save/restore on their own.
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
  */
 static inline void user_fpu_begin(void)
 {
@@ -520,24 +540,6 @@
 }
 
 /*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-	WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
-	if (use_eager_fpu()) {
-		__save_fpu(tsk);
-		return;
-	}
-
-	preempt_disable();
-	__save_init_fpu(tsk);
-	__thread_fpu_end(tsk);
-	preempt_enable();
-}
-
-/*
  * i387 state interaction
  */
 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290..e9571dd 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-				    - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED	(-1)
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 47f29b1..e7814b7 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -69,7 +69,7 @@
 	const insn_byte_t *next_byte;
 };
 
-#define MAX_INSN_SIZE	16
+#define MAX_INSN_SIZE	15
 
 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
 #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f42a047..e37d6b3 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -79,11 +79,12 @@
  *  d). Similar to the 'init', except that this gets called from pci_iommu_init
  *      where we do have a memory allocator.
  *
- * The standard vs the _FINISH differs in that the _FINISH variant will
- * continue detecting other IOMMUs in the call list after the
- * the detection routine returns a positive number. The _FINISH will
- * stop the execution chain. Both will still call the 'init' and
- * 'late_init' functions if they are set.
+ * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
+ * in that the former will continue detecting other IOMMUs in the call
+ * list after the detection routine returns a positive number, while the
+ * latter will stop the execution chain upon first successful detection.
+ * Both variants will still call the 'init' and 'late_init' functions if
+ * they are set.
  */
 #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init)		\
 	__IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519..b77f5ed 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -136,10 +136,6 @@
 #define USERGS_SYSRET32				\
 	swapgs;					\
 	sysretl
-#define ENABLE_INTERRUPTS_SYSEXIT32		\
-	swapgs;					\
-	sti;					\
-	sysexit
 
 #else
 #define INTERRUPT_RETURN		iret
@@ -163,33 +159,9 @@
 
 	return arch_irqs_disabled_flags(flags);
 }
+#endif /* !__ASSEMBLY__ */
 
-#else
-
-#ifdef CONFIG_X86_64
-#define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
-	TRACE_IRQS_ON; \
-	sti; \
-	SAVE_REST; \
-	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
-	cli; \
-	TRACE_IRQS_OFF;
-
-#else
-#define ARCH_LOCKDEP_SYS_EXIT			\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call lockdep_sys_exit;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ
-#endif
-
+#ifdef __ASSEMBLY__
 #ifdef CONFIG_TRACE_IRQFLAGS
 #  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
 #  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
@@ -198,12 +170,29 @@
 #  define TRACE_IRQS_OFF
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  define LOCKDEP_SYS_EXIT	ARCH_LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ	ARCH_LOCKDEP_SYS_EXIT_IRQ
-# else
+#  ifdef CONFIG_X86_64
+#    define LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
+#    define LOCKDEP_SYS_EXIT_IRQ \
+	TRACE_IRQS_ON; \
+	sti; \
+	call lockdep_sys_exit_thunk; \
+	cli; \
+	TRACE_IRQS_OFF;
+#  else
+#    define LOCKDEP_SYS_EXIT \
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call lockdep_sys_exit;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+#    define LOCKDEP_SYS_EXIT_IRQ
+#  endif
+#else
 #  define LOCKDEP_SYS_EXIT
 #  define LOCKDEP_SYS_EXIT_IRQ
-# endif
-
+#endif
 #endif /* __ASSEMBLY__ */
+
 #endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 6a2cefb..a4c1cf7 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/stringify.h>
 #include <linux/types.h>
@@ -30,8 +30,6 @@
 	return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_X86_64
 typedef u64 jump_label_t;
 #else
@@ -44,4 +42,5 @@
 	jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39..dea2e7e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,11 +81,6 @@
 		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
-#define SELECTOR_TI_MASK (1 << 2)
-#define SELECTOR_RPL_MASK 0x03
-
-#define IOPL_SHIFT 12
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
@@ -345,6 +340,7 @@
 enum {
 	KVM_DEBUGREG_BP_ENABLED = 1,
 	KVM_DEBUGREG_WONT_EXIT = 2,
+	KVM_DEBUGREG_RELOAD = 4,
 };
 
 struct kvm_vcpu_arch {
@@ -431,6 +427,9 @@
 
 	int cpuid_nent;
 	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+	int maxphyaddr;
+
 	/* emulate context */
 
 	struct x86_emulate_ctxt emulate_ctxt;
@@ -550,11 +549,20 @@
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+/*
+ * We use as the mode the number of bits allocated in the LDR for the
+ * logical processor ID.  It happens that these are all powers of two.
+ * This makes it is very easy to detect cases where the APICs are
+ * configured for multiple modes; in that case, we cannot use the map and
+ * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ */
+#define KVM_APIC_MODE_XAPIC_CLUSTER          4
+#define KVM_APIC_MODE_XAPIC_FLAT             8
+#define KVM_APIC_MODE_X2APIC                16
+
 struct kvm_apic_map {
 	struct rcu_head rcu;
-	u8 ldr_bits;
-	/* fields bellow are used to decode ldr values in different modes */
-	u32 cid_shift, cid_mask, lid_mask, broadcast;
+	u8 mode;
 	struct kvm_lapic *phys_map[256];
 	/* first index is cluster id second is cpu id in a cluster */
 	struct kvm_lapic *logical_map[16][16];
@@ -859,6 +867,8 @@
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+					struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -933,6 +943,7 @@
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1128,7 +1139,6 @@
 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index e62cf89..c1adf33 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -115,7 +115,7 @@
 
 static inline bool kvm_para_available(void)
 {
-	return 0;
+	return false;
 }
 
 static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9b3de99..1f5a86d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,6 +116,12 @@
 	u32 rip_msr;
 };
 
+struct mce_vendor_flags {
+	__u64		overflow_recov	: 1, /* cpuid_ebx(80000007) */
+			__reserved_0	: 63;
+};
+extern struct mce_vendor_flags mce_flags;
+
 extern struct mca_config mca_cfg;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -128,9 +134,11 @@
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
+void mcheck_vendor_init_severity(void);
 #else
 static inline int mcheck_init(void) { return 0; }
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_vendor_init_severity(void) {}
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
@@ -183,11 +191,11 @@
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
 
 enum mcp_flags {
-	MCP_TIMESTAMP = (1 << 0),	/* log time stamp */
-	MCP_UC = (1 << 1),		/* log uncorrected errors */
-	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
+	MCP_TIMESTAMP	= BIT(0),	/* log time stamp */
+	MCP_UC		= BIT(1),	/* log uncorrected errors */
+	MCP_DONTLOG	= BIT(2),	/* only clear, don't log */
 };
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 int mce_notify_irq(void);
 
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 201b520..2fb20d6 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -75,6 +75,79 @@
 
 #ifdef CONFIG_MICROCODE_EARLY
 #define MAX_UCODE_COUNT 128
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
+		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_vendor() to get vendor id for AP.
+ *
+ * x86_vendor() gets vendor information directly from CPUID.
+ */
+static inline int x86_vendor(void)
+{
+	u32 eax = 0x00000000;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+		return X86_VENDOR_INTEL;
+
+	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+		return X86_VENDOR_AMD;
+
+	return X86_VENDOR_UNKNOWN;
+}
+
+static inline unsigned int __x86_family(unsigned int sig)
+{
+	unsigned int x86;
+
+	x86 = (sig >> 8) & 0xf;
+
+	if (x86 == 0xf)
+		x86 += (sig >> 20) & 0xff;
+
+	return x86;
+}
+
+static inline unsigned int x86_family(void)
+{
+	u32 eax = 0x00000001;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	return __x86_family(eax);
+}
+
+static inline unsigned int x86_model(unsigned int sig)
+{
+	unsigned int x86, model;
+
+	x86 = __x86_family(sig);
+
+	model = (sig >> 4) & 0xf;
+
+	if (x86 == 0x6 || x86 == 0xf)
+		model += ((sig >> 16) & 0xf) << 4;
+
+	return model;
+}
+
 extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 extern int __init save_microcode_in_initrd(void);
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index dd4c200..2b9209c 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -56,12 +56,15 @@
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
-extern int
-get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
+extern int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc);
 extern int microcode_sanity_check(void *mc, int print_err);
-extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
-extern int
-update_match_revision(struct microcode_header_intel *mc_header, int rev);
+extern int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc);
+
+static inline int
+revision_is_newer(struct microcode_header_intel *mc_header, int rev)
+{
+	return (mc_header->rev <= rev) ? 0 : 1;
+}
 
 #ifdef CONFIG_MICROCODE_INTEL_EARLY
 extern void __init load_ucode_intel_bsp(void);
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db..653dfa7 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@
 		     :: "a" (eax), "c" (ecx));
 }
 
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+	trace_hardirqs_on();
+	/* "mwait %eax, %ecx;" */
+	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+		     :: "a" (eax), "c" (ecx));
+}
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 965c47d..5f6051d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -976,11 +976,6 @@
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-
-#define ENABLE_INTERRUPTS_SYSEXIT32					\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
-		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
 #endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec1c935..d2203b5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -210,8 +210,23 @@
 	unsigned long		sp0;
 	unsigned short		ss0, __ss0h;
 	unsigned long		sp1;
-	/* ss1 caches MSR_IA32_SYSENTER_CS: */
-	unsigned short		ss1, __ss1h;
+
+	/*
+	 * We don't use ring 1, so ss1 is a convenient scratch space in
+	 * the same cacheline as sp0.  We use ss1 to cache the value in
+	 * MSR_IA32_SYSENTER_CS.  When we context switch
+	 * MSR_IA32_SYSENTER_CS, we first check if the new value being
+	 * written matches ss1, and, if it's not, then we wrmsr the new
+	 * value and update ss1.
+	 *
+	 * The only reason we context switch MSR_IA32_SYSENTER_CS is
+	 * that we set it to zero in vm86 tasks to avoid corrupting the
+	 * stack if we were to go through the sysenter path from vm86
+	 * mode.
+	 */
+	unsigned short		ss1;	/* MSR_IA32_SYSENTER_CS */
+
+	unsigned short		__ss1h;
 	unsigned long		sp2;
 	unsigned short		ss2, __ss2h;
 	unsigned long		__cr3;
@@ -276,13 +291,17 @@
 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
 
 	/*
-	 * .. and then another 0x100 bytes for the emergency kernel stack:
+	 * Space for the temporary SYSENTER stack:
 	 */
-	unsigned long		stack[64];
+	unsigned long		SYSENTER_stack[64];
 
 } ____cacheline_aligned;
 
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
 
 /*
  * Save the original ist values for checking stack pointers during debugging
@@ -474,7 +493,6 @@
 #ifdef CONFIG_X86_32
 	unsigned long		sysenter_cs;
 #else
-	unsigned long		usersp;	/* Copy from PDA */
 	unsigned short		es;
 	unsigned short		ds;
 	unsigned short		fsindex;
@@ -564,6 +582,16 @@
 #endif
 }
 
+static inline unsigned long current_top_of_stack(void)
+{
+#ifdef CONFIG_X86_64
+	return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+	/* sp0 on x86_32 is special in and around vm86 mode. */
+	return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -761,10 +789,10 @@
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
-# define BASE_PREFETCH		ASM_NOP4
+# define BASE_PREFETCH		""
 # define ARCH_HAS_PREFETCH
 #else
-# define BASE_PREFETCH		"prefetcht0 (%1)"
+# define BASE_PREFETCH		"prefetcht0 %P1"
 #endif
 
 /*
@@ -775,10 +803,9 @@
  */
 static inline void prefetch(const void *x)
 {
-	alternative_input(BASE_PREFETCH,
-			  "prefetchnta (%1)",
+	alternative_input(BASE_PREFETCH, "prefetchnta %P1",
 			  X86_FEATURE_XMM,
-			  "r" (x));
+			  "m" (*(const char *)x));
 }
 
 /*
@@ -788,10 +815,9 @@
  */
 static inline void prefetchw(const void *x)
 {
-	alternative_input(BASE_PREFETCH,
-			  "prefetchw (%1)",
-			  X86_FEATURE_3DNOW,
-			  "r" (x));
+	alternative_input(BASE_PREFETCH, "prefetchw %P1",
+			  X86_FEATURE_3DNOWPREFETCH,
+			  "m" (*(const char *)x));
 }
 
 static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +825,9 @@
 	prefetchw(x);
 }
 
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+			   TOP_OF_KERNEL_STACK_PADDING)
+
 #ifdef CONFIG_X86_32
 /*
  * User space process size: 3GB (default).
@@ -809,39 +838,16 @@
 #define STACK_TOP_MAX		STACK_TOP
 
 #define INIT_THREAD  {							  \
-	.sp0			= sizeof(init_stack) + (long)&init_stack, \
+	.sp0			= TOP_OF_INIT_STACK,			  \
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
 }
 
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS  {							  \
-	.x86_tss = {							  \
-		.sp0		= sizeof(init_stack) + (long)&init_stack, \
-		.ss0		= __KERNEL_DS,				  \
-		.ss1		= __KERNEL_CS,				  \
-		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		  \
-	 },								  \
-	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },	  \
-}
-
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
-#define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info)                                                 \
-({                                                                     \
-       unsigned long *__ptr = (unsigned long *)(info);                 \
-       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
-})
-
 /*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
  * is accessible even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
@@ -850,11 +856,11 @@
  * "struct pt_regs" is possible, but they may contain the
  * completely wrong values.
  */
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
-       __regs__ - 1;                                                   \
+#define task_pt_regs(task) \
+({									\
+	unsigned long __ptr = (unsigned long)task_stack_page(task);	\
+	__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;		\
+	((struct pt_regs *)__ptr) - 1;					\
 })
 
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
@@ -886,11 +892,7 @@
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
 #define INIT_THREAD  { \
-	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS  { \
-	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = TOP_OF_INIT_STACK \
 }
 
 /*
@@ -902,11 +904,6 @@
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
 #endif /* CONFIG_X86_64 */
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb..19507ff 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long bp;
 	unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -47,9 +51,12 @@
 	unsigned long dx;
 	unsigned long si;
 	unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long ip;
 	unsigned long cs;
 	unsigned long flags;
@@ -89,11 +96,13 @@
 }
 
 /*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value.  This tricky test checks that with
- * one comparison.  Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * user_mode(regs) determines whether a register set came from user
+ * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
+ * register set was from protected mode with RPL-3 CS value.  This
+ * tricky test checks that with one comparison.
+ *
+ * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
+ * the extra check.
  */
 static inline int user_mode(struct pt_regs *regs)
 {
@@ -104,16 +113,6 @@
 #endif
 }
 
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_32
-	return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
-		USER_RPL;
-#else
-	return user_mode(regs);
-#endif
-}
-
 static inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@
 #endif
 }
 
-#define current_user_stack_pointer()	this_cpu_read(old_rsp)
-/* ia32 vs. x32 difference */
-#define compat_user_stack_pointer()	\
-	(test_thread_flag(TIF_IA32) 	\
-	 ? current_pt_regs()->sp 	\
-	 : this_cpu_read(old_rsp))
+#define current_user_stack_pointer()	current_pt_regs()->sp
+#define compat_user_stack_pointer()	current_pt_regs()->sp
 #endif
 
 #ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@
  */
 #define arch_ptrace_stop_needed(code, info)				\
 ({									\
-	set_thread_flag(TIF_NOTIFY_RESUME);				\
+	force_iret();							\
 	false;								\
 })
 
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e..25b1cc0 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@
 
 struct pvclock_vsyscall_time_info {
 	struct pvclock_vcpu_time_info pvti;
+	u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index db257a5..5a9856e 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -3,8 +3,10 @@
 
 #include <linux/const.h>
 
-/* Constructor for a conventional segment GDT (or LDT) entry */
-/* This is a macro so it can be used in initializers */
+/*
+ * Constructor for a conventional segment GDT (or LDT) entry.
+ * This is a macro so it can be used in initializers.
+ */
 #define GDT_ENTRY(flags, base, limit)			\
 	((((base)  & _AC(0xff000000,ULL)) << (56-24)) |	\
 	 (((flags) & _AC(0x0000f0ff,ULL)) << 40) |	\
@@ -12,198 +14,228 @@
 	 (((base)  & _AC(0x00ffffff,ULL)) << 16) |	\
 	 (((limit) & _AC(0x0000ffff,ULL))))
 
-/* Simple and small GDT entries for booting only */
+/* Simple and small GDT entries for booting only: */
 
 #define GDT_ENTRY_BOOT_CS	2
-#define __BOOT_CS		(GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS	3
+#define GDT_ENTRY_BOOT_TSS	4
+#define __BOOT_CS		(GDT_ENTRY_BOOT_CS*8)
+#define __BOOT_DS		(GDT_ENTRY_BOOT_DS*8)
+#define __BOOT_TSS		(GDT_ENTRY_BOOT_TSS*8)
 
-#define GDT_ENTRY_BOOT_DS	(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS		(GDT_ENTRY_BOOT_DS * 8)
+/*
+ * Bottom two bits of selector give the ring
+ * privilege level
+ */
+#define SEGMENT_RPL_MASK	0x3
 
-#define GDT_ENTRY_BOOT_TSS	(GDT_ENTRY_BOOT_CS + 2)
-#define __BOOT_TSS		(GDT_ENTRY_BOOT_TSS * 8)
+/* User mode is privilege level 3: */
+#define USER_RPL		0x3
 
-#define SEGMENT_RPL_MASK	0x3 /*
-				     * Bottom two bits of selector give the ring
-				     * privilege level
-				     */
-#define SEGMENT_TI_MASK		0x4 /* Bit 2 is table indicator (LDT/GDT) */
-#define USER_RPL		0x3 /* User mode is privilege level 3 */
-#define SEGMENT_LDT		0x4 /* LDT segment has TI set... */
-#define SEGMENT_GDT		0x0 /* ... GDT has it cleared */
+/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
+#define SEGMENT_TI_MASK		0x4
+/* LDT segment has TI set ... */
+#define SEGMENT_LDT		0x4
+/* ... GDT has it cleared */
+#define SEGMENT_GDT		0x0
+
+#define GDT_ENTRY_INVALID_SEG	0
 
 #ifdef CONFIG_X86_32
 /*
  * The layout of the per-CPU GDT under Linux:
  *
- *   0 - null
+ *   0 - null								<=== cacheline #1
  *   1 - reserved
  *   2 - reserved
  *   3 - reserved
  *
- *   4 - unused			<==== new cacheline
+ *   4 - unused								<=== cacheline #2
  *   5 - unused
  *
  *  ------- start of TLS (Thread-Local Storage) segments:
  *
  *   6 - TLS segment #1			[ glibc's TLS segment ]
  *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
- *   8 - TLS segment #3
+ *   8 - TLS segment #3							<=== cacheline #3
  *   9 - reserved
  *  10 - reserved
  *  11 - reserved
  *
  *  ------- start of kernel segments:
  *
- *  12 - kernel code segment		<==== new cacheline
+ *  12 - kernel code segment						<=== cacheline #4
  *  13 - kernel data segment
  *  14 - default user CS
  *  15 - default user DS
- *  16 - TSS
+ *  16 - TSS								<=== cacheline #5
  *  17 - LDT
  *  18 - PNPBIOS support (16->32 gate)
  *  19 - PNPBIOS support
- *  20 - PNPBIOS support
+ *  20 - PNPBIOS support						<=== cacheline #6
  *  21 - PNPBIOS support
  *  22 - PNPBIOS support
  *  23 - APM BIOS support
- *  24 - APM BIOS support
+ *  24 - APM BIOS support						<=== cacheline #7
  *  25 - APM BIOS support
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu			[ offset to per-cpu data area ]
- *  28 - stack_canary-20		[ for stack protector ]
+ *  28 - stack_canary-20		[ for stack protector ]		<=== cacheline #8
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
  */
-#define GDT_ENTRY_TLS_MIN	6
-#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_TLS_MIN		6
+#define GDT_ENTRY_TLS_MAX 		(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
+#define GDT_ENTRY_KERNEL_CS		12
+#define GDT_ENTRY_KERNEL_DS		13
 #define GDT_ENTRY_DEFAULT_USER_CS	14
-
 #define GDT_ENTRY_DEFAULT_USER_DS	15
+#define GDT_ENTRY_TSS			16
+#define GDT_ENTRY_LDT			17
+#define GDT_ENTRY_PNPBIOS_CS32		18
+#define GDT_ENTRY_PNPBIOS_CS16		19
+#define GDT_ENTRY_PNPBIOS_DS		20
+#define GDT_ENTRY_PNPBIOS_TS1		21
+#define GDT_ENTRY_PNPBIOS_TS2		22
+#define GDT_ENTRY_APMBIOS_BASE		23
 
-#define GDT_ENTRY_KERNEL_BASE		(12)
-
-#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE+0)
-
-#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE+1)
-
-#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE+4)
-#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE+5)
-
-#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE+6)
-#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE+11)
-
-#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE+14)
-#define __ESPFIX_SS			(GDT_ENTRY_ESPFIX_SS*8)
-
-#define GDT_ENTRY_PERCPU		(GDT_ENTRY_KERNEL_BASE+15)
-#ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
-#else
-#define __KERNEL_PERCPU 0
-#endif
-
-#define GDT_ENTRY_STACK_CANARY		(GDT_ENTRY_KERNEL_BASE+16)
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY*8)
-#else
-#define __KERNEL_STACK_CANARY		0
-#endif
+#define GDT_ENTRY_ESPFIX_SS		26
+#define GDT_ENTRY_PERCPU		27
+#define GDT_ENTRY_STACK_CANARY		28
 
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
 /*
- * The GDT has 32 entries
+ * Number of entries in the GDT table:
  */
-#define GDT_ENTRIES 32
-
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS		(GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1		(GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2		(GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)	/* segment for calling fn */
-#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)	/* code segment for BIOS */
-#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)	/* data segment for BIOS */
-#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
-#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
-
+#define GDT_ENTRIES			32
 
 /*
- * Matching rules for certain types of segments.
+ * Segment selector values corresponding to the above entries:
  */
 
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
+#define __KERNEL_CS			(GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8)
+#define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __ESPFIX_SS			(GDT_ENTRY_ESPFIX_SS*8)
 
+/* segment for calling fn: */
+#define PNP_CS32			(GDT_ENTRY_PNPBIOS_CS32*8)
+/* code segment for BIOS: */
+#define PNP_CS16			(GDT_ENTRY_PNPBIOS_CS16*8)
 
+/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
+#define SEGMENT_IS_PNP_CODE(x)		(((x) & 0xf4) == PNP_CS32)
+
+/* data segment for BIOS: */
+#define PNP_DS				(GDT_ENTRY_PNPBIOS_DS*8)
+/* transfer data segment: */
+#define PNP_TS1				(GDT_ENTRY_PNPBIOS_TS1*8)
+/* another data segment: */
+#define PNP_TS2				(GDT_ENTRY_PNPBIOS_TS2*8)
+
+#ifdef CONFIG_SMP
+# define __KERNEL_PERCPU		(GDT_ENTRY_PERCPU*8)
 #else
+# define __KERNEL_PERCPU		0
+#endif
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+# define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY*8)
+#else
+# define __KERNEL_STACK_CANARY		0
+#endif
+
+#else /* 64-bit: */
+
 #include <asm/cache.h>
 
-#define GDT_ENTRY_KERNEL32_CS 1
-#define GDT_ENTRY_KERNEL_CS 2
-#define GDT_ENTRY_KERNEL_DS 3
-
-#define __KERNEL32_CS   (GDT_ENTRY_KERNEL32_CS * 8)
+#define GDT_ENTRY_KERNEL32_CS		1
+#define GDT_ENTRY_KERNEL_CS		2
+#define GDT_ENTRY_KERNEL_DS		3
 
 /*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ * We cannot use the same code segment descriptor for user and kernel mode,
+ * not even in long flat mode, because of different DPL.
+ *
+ * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
+ * selectors:
+ *
+ *   if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
+ *   if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
+ *
+ * ss = STAR.SYSRET_CS+8 (in either case)
+ *
+ * thus USER_DS should be between 32-bit and 64-bit code selectors:
  */
-#define GDT_ENTRY_DEFAULT_USER32_CS 4
-#define GDT_ENTRY_DEFAULT_USER_DS 5
-#define GDT_ENTRY_DEFAULT_USER_CS 6
-#define __USER32_CS   (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
-#define __USER32_DS	__USER_DS
+#define GDT_ENTRY_DEFAULT_USER32_CS	4
+#define GDT_ENTRY_DEFAULT_USER_DS	5
+#define GDT_ENTRY_DEFAULT_USER_CS	6
 
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
+/* Needs two entries */
+#define GDT_ENTRY_TSS			8
+/* Needs two entries */
+#define GDT_ENTRY_LDT			10
 
-#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
+#define GDT_ENTRY_TLS_MIN		12
+#define GDT_ENTRY_TLS_MAX		14
 
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
+/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU		15
 
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+/*
+ * Number of entries in the GDT table:
+ */
+#define GDT_ENTRIES			16
 
-#define GDT_ENTRIES 16
+/*
+ * Segment selector values corresponding to the above entries:
+ *
+ * Note, selectors also need to have a correct RPL,
+ * expressed with the +3 value for user-space selectors:
+ */
+#define __KERNEL32_CS			(GDT_ENTRY_KERNEL32_CS*8)
+#define __KERNEL_CS			(GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8)
+#define __USER32_CS			(GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
+#define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER32_DS			__USER_DS
+#define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __PER_CPU_SEG			(GDT_ENTRY_PER_CPU*8 + 3)
+
+/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
+#define FS_TLS				0
+#define GS_TLS				1
+
+#define GS_TLS_SEL			((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL			((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #endif
 
-#define __KERNEL_CS	(GDT_ENTRY_KERNEL_CS*8)
-#define __KERNEL_DS	(GDT_ENTRY_KERNEL_DS*8)
-#define __USER_DS	(GDT_ENTRY_DEFAULT_USER_DS*8+3)
-#define __USER_CS	(GDT_ENTRY_DEFAULT_USER_CS*8+3)
 #ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl()  0
+# define get_kernel_rpl()		0
 #endif
 
-#define IDT_ENTRIES 256
-#define NUM_EXCEPTION_VECTORS 32
-/* Bitmask of exception vectors which push an error code on the stack */
-#define EXCEPTION_ERRCODE_MASK  0x00027d00
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+#define IDT_ENTRIES			256
+#define NUM_EXCEPTION_VECTORS		32
+
+/* Bitmask of exception vectors which push an error code on the stack: */
+#define EXCEPTION_ERRCODE_MASK		0x00027d00
+
+#define GDT_SIZE			(GDT_ENTRIES*8)
+#define GDT_ENTRY_TLS_ENTRIES		3
+#define TLS_SIZE			(GDT_ENTRY_TLS_ENTRIES* 8)
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
+
 extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
 #ifdef CONFIG_TRACING
-#define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handlers early_idt_handlers
 #endif
 
 /*
@@ -228,37 +260,30 @@
 } while (0)
 
 /*
- * Save a segment register away
+ * Save a segment register away:
  */
 #define savesegment(seg, value)				\
 	asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
 /*
- * x86_32 user gs accessors.
+ * x86-32 user GS accessors:
  */
 #ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs)	(u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v)	loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk)	((tsk)->thread.gs)
-#define lazy_save_gs(v)		savesegment(gs, (v))
-#define lazy_load_gs(v)		loadsegment(gs, (v))
-#else	/* X86_32_LAZY_GS */
-#define get_user_gs(regs)	(u16)((regs)->gs)
-#define set_user_gs(regs, v)	do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk)	(task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v)		do { } while (0)
-#define lazy_load_gs(v)		do { } while (0)
-#endif	/* X86_32_LAZY_GS */
+# ifdef CONFIG_X86_32_LAZY_GS
+#  define get_user_gs(regs)		(u16)({ unsigned long v; savesegment(gs, v); v; })
+#  define set_user_gs(regs, v)		loadsegment(gs, (unsigned long)(v))
+#  define task_user_gs(tsk)		((tsk)->thread.gs)
+#  define lazy_save_gs(v)		savesegment(gs, (v))
+#  define lazy_load_gs(v)		loadsegment(gs, (v))
+# else	/* X86_32_LAZY_GS */
+#  define get_user_gs(regs)		(u16)((regs)->gs)
+#  define set_user_gs(regs, v)		do { (regs)->gs = (v); } while (0)
+#  define task_user_gs(tsk)		(task_pt_regs(tsk)->gs)
+#  define lazy_save_gs(v)		do { } while (0)
+#  define lazy_load_gs(v)		do { } while (0)
+# endif	/* X86_32_LAZY_GS */
 #endif	/* X86_32 */
 
-static inline unsigned long get_limit(unsigned long segment)
-{
-	unsigned long __limit;
-	asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
-	return __limit + 1;
-}
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ff4e7b2..f69e06b 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -66,6 +66,11 @@
  */
 extern struct boot_params boot_params;
 
+static inline bool kaslr_enabled(void)
+{
+	return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
 /*
  * Do NOT EVER look at the BIOS memory size location.
  * It does not work on many machines.
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e..6fe6b18 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@
 	unsigned long ip;
 	unsigned long flags;
 	unsigned short cs;
-	unsigned short gs;
-	unsigned short fs;
-	unsigned short __pad0;
+	unsigned short __pad2;	/* Was called gs, but was always zero. */
+	unsigned short __pad1;	/* Was called fs, but was always zero. */
+	unsigned short ss;
 	unsigned long err;
 	unsigned long trapno;
 	unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 7a95816..89db467 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,9 +13,7 @@
 			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		       unsigned long *pax);
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		     struct pt_regs *regs, unsigned long mask);
 
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f..ba665eb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
 
 #ifdef CONFIG_X86_SMAP
 
-#define ASM_CLAC							\
-	661: ASM_NOP3 ;							\
-	.pushsection .altinstr_replacement, "ax" ;			\
-	662: __ASM_CLAC ;						\
-	.popsection ;							\
-	.pushsection .altinstructions, "a" ;				\
-	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
-	.popsection
+#define ASM_CLAC \
+	ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
 
-#define ASM_STAC							\
-	661: ASM_NOP3 ;							\
-	.pushsection .altinstr_replacement, "ax" ;			\
-	662: __ASM_STAC ;						\
-	.popsection ;							\
-	.pushsection .altinstructions, "a" ;				\
-	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
-	.popsection
+#define ASM_STAC \
+	ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
 
 #else /* CONFIG_X86_SMAP */
 
@@ -61,20 +49,20 @@
 static __always_inline void clac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+	alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+	alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-	ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
 #define ASM_STAC \
-	ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
 
 #else /* CONFIG_X86_SMAP */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd1cc3..81d02fc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -154,6 +154,7 @@
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
+void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
 void native_cpu_die(unsigned int cpu);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6a4b00f..aeb4666e 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -4,6 +4,8 @@
 
 #ifdef __KERNEL__
 
+#include <asm/nops.h>
+
 static inline void native_clts(void)
 {
 	asm volatile("clts");
@@ -199,6 +201,28 @@
 		       "+m" (*(volatile char __force *)__p));
 }
 
+static inline void clwb(volatile void *__p)
+{
+	volatile struct { char x[64]; } *p = __p;
+
+	asm volatile(ALTERNATIVE_2(
+		".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+		".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
+		X86_FEATURE_CLFLUSHOPT,
+		".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
+		X86_FEATURE_CLWB)
+		: [p] "+m" (*p)
+		: [pax] "a" (p));
+}
+
+static inline void pcommit_sfence(void)
+{
+	alternative(ASM_NOP7,
+		    ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
+		    "sfence",
+		    X86_FEATURE_PCOMMIT);
+}
+
 #define nop() asm volatile ("nop")
 
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1d4e4f2..ea2dbe8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,6 +13,33 @@
 #include <asm/types.h>
 
 /*
+ * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
+ * reserve at the top of the kernel stack.  We do it because of a nasty
+ * 32-bit corner case.  On x86_32, the hardware stack frame is
+ * variable-length.  Except for vm86 mode, struct pt_regs assumes a
+ * maximum-length frame.  If we enter from CPL 0, the top 8 bytes of
+ * pt_regs don't actually exist.  Ordinarily this doesn't matter, but it
+ * does in at least one case:
+ *
+ * If we take an NMI early enough in SYSENTER, then we can end up with
+ * pt_regs that extends above sp0.  On the way out, in the espfix code,
+ * we can read the saved SS value, but that value will be above sp0.
+ * Without this offset, that can result in a page fault.  (We are
+ * careful that, in this case, the value we read doesn't matter.)
+ *
+ * In vm86 mode, the hardware frame is much longer still, but we neither
+ * access the extra members from NMI context, nor do we write such a
+ * frame at sp0 at all.
+ *
+ * x86_64 has a fixed-length stack frame.
+ */
+#ifdef CONFIG_X86_32
+# define TOP_OF_KERNEL_STACK_PADDING 8
+#else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+#endif
+
+/*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
  * - this struct shares the supervisor stack pages
@@ -145,7 +172,6 @@
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN		(THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET	(5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -158,10 +184,7 @@
 
 static inline struct thread_info *current_thread_info(void)
 {
-	struct thread_info *ti;
-	ti = (void *)(this_cpu_read_stable(kernel_stack) +
-		      KERNEL_STACK_OFFSET - THREAD_SIZE);
-	return ti;
+	return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
 }
 
 static inline unsigned long current_stack_pointer(void)
@@ -177,16 +200,37 @@
 
 #else /* !__ASSEMBLY__ */
 
-/* how to get the thread information struct from ASM */
+/* Load thread_info address into "reg" */
 #define GET_THREAD_INFO(reg) \
 	_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-	_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+	_ASM_SUB $(THREAD_SIZE),reg ;
 
 /*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
+ * ASM operand which evaluates to a 'thread_info' address of
+ * the current task, if it is known that "reg" is exactly "off"
+ * bytes below the top of the stack currently.
+ *
+ * ( The kernel stack's size is known at build time, it is usually
+ *   2 or 4 pages, and the bottom  of the kernel stack contains
+ *   the thread_info structure. So to access the thread_info very
+ *   quickly from assembly code we can calculate down from the
+ *   top of the kernel stack to the bottom, using constant,
+ *   build-time calculations only. )
+ *
+ * For example, to fetch the current thread_info->flags value into %eax
+ * on x86-64 defconfig kernels, in syscall entry code where RSP is
+ * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+ * stack:
+ *
+ *      mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+ *
+ * will translate to:
+ *
+ *      8b 84 24 b8 c0 ff ff      mov    -0x3f48(%rsp), %eax
+ *
+ * which is below the current RSP by almost 16K.
  */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
 
 #endif
 
@@ -236,6 +280,16 @@
 #endif
 	return false;
 }
+
+/*
+ * Force syscall return via IRET by making it look as if there was
+ * some work pending. IRET is our most capable (but slowest) syscall
+ * return path, which is able to restore modified SS, CS and certain
+ * EFLAGS values that other (fast) syscall return instructions
+ * are not able to restore properly.
+ */
+#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
+
 #endif	/* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 12a26b9..f2f9b39 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -231,6 +231,6 @@
 }
 
 unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+copy_user_handle_tail(char *to, char *from, unsigned len);
 
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b098..ab456dc 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -15,6 +15,7 @@
 
 /* loadflags */
 #define LOADED_HIGH	(1<<0)
+#define KASLR_FLAG	(1<<1)
 #define QUIET_FLAG	(1<<5)
 #define KEEP_SEGMENTS	(1<<6)
 #define CAN_USE_HEAP	(1<<7)
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a..580aee3 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
@@ -41,15 +45,17 @@
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa..bc16115 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long rbp;
 	unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -57,9 +61,12 @@
 	unsigned long rdx;
 	unsigned long rsi;
 	unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long rip;
 	unsigned long cs;
 	unsigned long eflags;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d8b9f90..16dc4e8 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,9 +177,24 @@
 	__u64 rip;
 	__u64 eflags;		/* RFLAGS */
 	__u16 cs;
-	__u16 gs;
-	__u16 fs;
-	__u16 __pad0;
+
+	/*
+	 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+	 * Linux saved and restored fs and gs in these slots.  This
+	 * was counterproductive, as fsbase and gsbase were never
+	 * saved, so arch_prctl was presumably unreliable.
+	 *
+	 * If these slots are ever needed for any other purpose, there
+	 * is some risk that very old 64-bit binaries could get
+	 * confused.  I doubt that many such binaries still work,
+	 * though, since the same patch in 2.5.64 also removed the
+	 * 64-bit set_thread_area syscall, so it appears that there is
+	 * no TLS API that works in both pre- and post-2.5.64 kernels.
+	 */
+	__u16 __pad2;		/* Was gs. */
+	__u16 __pad1;		/* Was fs. */
+
+	__u16 ss;
 	__u64 err;
 	__u64 trapno;
 	__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1d..1fe9218 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_INVEPT              50
+#define EXIT_REASON_RDTSCP              51
 #define EXIT_REASON_PREEMPTION_TIMER    52
 #define EXIT_REASON_INVVPID             53
 #define EXIT_REASON_WBINVD              54
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cdb1b70..c887cd9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -32,6 +32,7 @@
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= mcount_64.o
 obj-y			+= syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_IA32_EMULATION)	+= syscall_32.o
 obj-$(CONFIG_X86_VSYSCALL_EMULATION)	+= vsyscall_64.o vsyscall_emu_64.o
 obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
 obj-$(CONFIG_SYSFS)	+= ksysfs.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 703130f..aef6531 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -52,10 +52,25 @@
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, ...)				\
-do {							\
-	if (debug_alternative)				\
-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+#define DPRINTK(fmt, args...)						\
+do {									\
+	if (debug_alternative)						\
+		printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);	\
+} while (0)
+
+#define DUMP_BYTES(buf, len, fmt, args...)				\
+do {									\
+	if (unlikely(debug_alternative)) {				\
+		int j;							\
+									\
+		if (!(len))						\
+			break;						\
+									\
+		printk(KERN_DEBUG fmt, ##args);				\
+		for (j = 0; j < (len) - 1; j++)				\
+			printk(KERN_CONT "%02hhx ", buf[j]);		\
+		printk(KERN_CONT "%02hhx\n", buf[j]);			\
+	}								\
 } while (0)
 
 /*
@@ -243,12 +258,89 @@
 extern s32 __smp_locks[], __smp_locks_end[];
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-/* Replace instructions with better alternatives for this CPU type.
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that asymmetric systems where
-   APs have less capabilities than the boot processor are not handled.
-   Tough. Make sure you disable such features by hand. */
+/*
+ * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ */
+static inline bool is_jmp(const u8 opcode)
+{
+	return opcode == 0xeb || opcode == 0xe9;
+}
 
+static void __init_or_module
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+{
+	u8 *next_rip, *tgt_rip;
+	s32 n_dspl, o_dspl;
+	int repl_len;
+
+	if (a->replacementlen != 5)
+		return;
+
+	o_dspl = *(s32 *)(insnbuf + 1);
+
+	/* next_rip of the replacement JMP */
+	next_rip = repl_insn + a->replacementlen;
+	/* target rip of the replacement JMP */
+	tgt_rip  = next_rip + o_dspl;
+	n_dspl = tgt_rip - orig_insn;
+
+	DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+
+	if (tgt_rip - orig_insn >= 0) {
+		if (n_dspl - 2 <= 127)
+			goto two_byte_jmp;
+		else
+			goto five_byte_jmp;
+	/* negative offset */
+	} else {
+		if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
+			goto two_byte_jmp;
+		else
+			goto five_byte_jmp;
+	}
+
+two_byte_jmp:
+	n_dspl -= 2;
+
+	insnbuf[0] = 0xeb;
+	insnbuf[1] = (s8)n_dspl;
+	add_nops(insnbuf + 2, 3);
+
+	repl_len = 2;
+	goto done;
+
+five_byte_jmp:
+	n_dspl -= 5;
+
+	insnbuf[0] = 0xe9;
+	*(s32 *)&insnbuf[1] = n_dspl;
+
+	repl_len = 5;
+
+done:
+
+	DPRINTK("final displ: 0x%08x, JMP 0x%lx",
+		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+}
+
+static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+{
+	if (instr[0] != 0x90)
+		return;
+
+	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+
+	DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+		   instr, a->instrlen - a->padlen, a->padlen);
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
 void __init_or_module apply_alternatives(struct alt_instr *start,
 					 struct alt_instr *end)
 {
@@ -256,10 +348,10 @@
 	u8 *instr, *replacement;
 	u8 insnbuf[MAX_PATCH_LEN];
 
-	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+	DPRINTK("alt table %p -> %p", start, end);
 	/*
 	 * The scan order should be from start to end. A later scanned
-	 * alternative code can overwrite a previous scanned alternative code.
+	 * alternative code can overwrite previously scanned alternative code.
 	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
 	 * patch code.
 	 *
@@ -267,29 +359,54 @@
 	 * order.
 	 */
 	for (a = start; a < end; a++) {
+		int insnbuf_sz = 0;
+
 		instr = (u8 *)&a->instr_offset + a->instr_offset;
 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
-		BUG_ON(a->replacementlen > a->instrlen);
 		BUG_ON(a->instrlen > sizeof(insnbuf));
 		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-		if (!boot_cpu_has(a->cpuid))
+		if (!boot_cpu_has(a->cpuid)) {
+			if (a->padlen > 1)
+				optimize_nops(a, instr);
+
 			continue;
+		}
+
+		DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+			a->cpuid >> 5,
+			a->cpuid & 0x1f,
+			instr, a->instrlen,
+			replacement, a->replacementlen, a->padlen);
+
+		DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+		DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
 
 		memcpy(insnbuf, replacement, a->replacementlen);
+		insnbuf_sz = a->replacementlen;
 
 		/* 0xe8 is a relative jump; fix the offset. */
-		if (*insnbuf == 0xe8 && a->replacementlen == 5)
-		    *(s32 *)(insnbuf + 1) += replacement - instr;
+		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+			*(s32 *)(insnbuf + 1) += replacement - instr;
+			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+				*(s32 *)(insnbuf + 1),
+				(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+		}
 
-		add_nops(insnbuf + a->replacementlen,
-			 a->instrlen - a->replacementlen);
+		if (a->replacementlen && is_jmp(replacement[0]))
+			recompute_jump(a, instr, replacement, insnbuf);
 
-		text_poke_early(instr, insnbuf, a->instrlen);
+		if (a->instrlen > a->replacementlen) {
+			add_nops(insnbuf + a->replacementlen,
+				 a->instrlen - a->replacementlen);
+			insnbuf_sz += a->instrlen - a->replacementlen;
+		}
+		DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
+
+		text_poke_early(instr, insnbuf, insnbuf_sz);
 	}
 }
 
 #ifdef CONFIG_SMP
-
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
 				  u8 *text, u8 *text_end)
 {
@@ -371,8 +488,8 @@
 	smp->locks_end	= locks_end;
 	smp->text	= text;
 	smp->text_end	= text_end;
-	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-		__func__, smp->locks, smp->locks_end,
+	DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+		smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
 	list_add_tail(&smp->next, &smp_alt_modules);
@@ -440,7 +557,7 @@
 
 	return 0;
 }
-#endif
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PARAVIRT
 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@@ -601,7 +718,7 @@
 	if (likely(!bp_patching_in_progress))
 		return 0;
 
-	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+	if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
 		return 0;
 
 	/* set up the specified breakpoint handler */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad3639a..dcb5285 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1084,67 +1084,6 @@
 	local_irq_restore(flags);
 }
 
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
-
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = lapic_get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
-
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ apic->apic_id_mask))
-		return 0;
-
-	/*
-	 * The next two are just to see if we have sane values.
-	 * They're only really relevant if we're in Virtual Wire
-	 * compatibility mode, but most boxes are anymore.
-	 */
-	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-	reg1 = apic_read(APIC_LVT1);
-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-	return 1;
-}
-
 /**
  * sync_Arb_IDs - synchronize APIC bus arbitration IDs
  */
@@ -2283,7 +2222,6 @@
 		disable_ioapic_support();
 
 	default_setup_apic_routing();
-	verify_local_APIC();
 	apic_bsp_setup(true);
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e658f21..d9d0bd2 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -135,12 +135,12 @@
 
 	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
 
-	__cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
 	for_each_online_cpu(cpu) {
 		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
 			continue;
-		__cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		__cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
+		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
 	}
 }
 
@@ -195,7 +195,7 @@
 
 	BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
 
-	__cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
+	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
 	register_hotcpu_notifier(&x2apic_cpu_notifier);
 	return 1;
 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8e9dcfd..c8d9295 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -144,33 +144,60 @@
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	int pnodeid, is_uv1, is_uv2, is_uv3;
+	int pnodeid;
+	int uv_apic;
 
-	is_uv1 = !strcmp(oem_id, "SGI");
-	is_uv2 = !strcmp(oem_id, "SGI2");
-	is_uv3 = !strncmp(oem_id, "SGI3", 4);	/* there are varieties of UV3 */
-	if (is_uv1 || is_uv2 || is_uv3) {
-		uv_hub_info->hub_revision =
-			(is_uv1 ? UV1_HUB_REVISION_BASE :
-			(is_uv2 ? UV2_HUB_REVISION_BASE :
-				  UV3_HUB_REVISION_BASE));
-		pnodeid = early_get_pnodeid();
-		early_get_apic_pnode_shift();
-		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
-		x86_platform.nmi_init = uv_nmi_init;
-		if (!strcmp(oem_table_id, "UVL"))
-			uv_system_type = UV_LEGACY_APIC;
-		else if (!strcmp(oem_table_id, "UVX"))
-			uv_system_type = UV_X2APIC;
-		else if (!strcmp(oem_table_id, "UVH")) {
-			__this_cpu_write(x2apic_extra_bits,
-				pnodeid << uvh_apicid.s.pnode_shift);
-			uv_system_type = UV_NON_UNIQUE_APIC;
-			uv_set_apicid_hibit();
-			return 1;
-		}
+	if (strncmp(oem_id, "SGI", 3) != 0)
+		return 0;
+
+	/*
+	 * Determine UV arch type.
+	 *   SGI: UV100/1000
+	 *   SGI2: UV2000/3000
+	 *   SGI3: UV300 (truncated to 4 chars because of different varieties)
+	 */
+	uv_hub_info->hub_revision =
+		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
+
+	if (uv_hub_info->hub_revision == 0)
+		goto badbios;
+
+	pnodeid = early_get_pnodeid();
+	early_get_apic_pnode_shift();
+	x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
+	x86_platform.nmi_init = uv_nmi_init;
+
+	if (!strcmp(oem_table_id, "UVX")) {		/* most common */
+		uv_system_type = UV_X2APIC;
+		uv_apic = 0;
+
+	} else if (!strcmp(oem_table_id, "UVH")) {	/* only UV1 systems */
+		uv_system_type = UV_NON_UNIQUE_APIC;
+		__this_cpu_write(x2apic_extra_bits,
+			pnodeid << uvh_apicid.s.pnode_shift);
+		uv_set_apicid_hibit();
+		uv_apic = 1;
+
+	} else	if (!strcmp(oem_table_id, "UVL")) {	/* only used for */
+		uv_system_type = UV_LEGACY_APIC;	/* very small systems */
+		uv_apic = 0;
+
+	} else {
+		goto badbios;
 	}
-	return 0;
+
+	pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
+		oem_id, oem_table_id, uv_system_type,
+		uv_min_hub_revision_id, uv_apic);
+
+	return uv_apic;
+
+badbios:
+	pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
+	pr_err("Current BIOS not supported, update kernel and/or BIOS\n");
+	BUG();
 }
 
 enum uv_system_type get_uv_system_type(void)
@@ -854,10 +881,14 @@
 	unsigned long mmr_base, present, paddr;
 	unsigned short pnode_mask;
 	unsigned char n_lshift;
-	char *hub = (is_uv1_hub() ? "UV1" :
-		    (is_uv2_hub() ? "UV2" :
-				    "UV3"));
+	char *hub = (is_uv1_hub() ? "UV100/1000" :
+		    (is_uv2_hub() ? "UV2000/3000" :
+		    (is_uv3_hub() ? "UV300" : NULL)));
 
+	if (!hub) {
+		pr_err("UV: Unknown/unsupported UV hub\n");
+		return;
+	}
 	pr_info("UV: Found %s hub\n", hub);
 	map_low_mmrs();
 
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 3b3b9d3..47703ae 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -68,7 +68,7 @@
 
 	/* Offset from the sysenter stack to tss.sp0 */
 	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-		 sizeof(struct tss_struct));
+	       offsetofend(struct tss_struct, SYSENTER_stack));
 
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index fdcbb4d..5ce6f2d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -81,6 +81,7 @@
 #undef ENTRY
 
 	OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
 	BLANK();
 
 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a220239..fd470eb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
@@ -488,6 +489,9 @@
 
 		va_align.mask	  = (upperbit - 1) & PAGE_MASK;
 		va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+
+		/* A random value per boot for bit slice [12:upper_bit) */
+		va_align.bits = get_random_int() & va_align.mask;
 	}
 }
 
@@ -711,6 +715,11 @@
 		set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
 
 	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+	/* 3DNow or LM implies PREFETCHW */
+	if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
+		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
+			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2346c95..3f70538 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -959,38 +959,37 @@
 #endif
 }
 
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
-/* May not be __init: called during resume */
-static void syscall32_cpu_init(void)
-{
-	/* Load these always in case some future AMD CPU supports
-	   SYSENTER from compat mode too. */
-	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
-	wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
-#endif		/* CONFIG_IA32_EMULATION */
-#endif		/* CONFIG_X86_64 */
-
+/*
+ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
+ * on 32-bit kernels:
+ */
 #ifdef CONFIG_X86_32
 void enable_sep_cpu(void)
 {
-	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss;
+	int cpu;
 
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		put_cpu();
-		return;
-	}
+	cpu = get_cpu();
+	tss = &per_cpu(cpu_tss, cpu);
+
+	if (!boot_cpu_has(X86_FEATURE_SEP))
+		goto out;
+
+	/*
+	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+	 * see the big comment in struct x86_hw_tss's definition.
+	 */
 
 	tss->x86_tss.ss1 = __KERNEL_CS;
-	tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
-	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
-	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+
+	wrmsr(MSR_IA32_SYSENTER_ESP,
+	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
+	      0);
+
+	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+
+out:
 	put_cpu();
 }
 #endif
@@ -1118,7 +1117,7 @@
 __setup("clearcpuid=", setup_disablecpuid);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+	(unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
@@ -1130,8 +1129,8 @@
 		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
 /*
- * The following four percpu variables are hot.  Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot.  Align current_task to
+ * cacheline size such that they fall in the same cacheline.
  */
 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
 	&init_task;
@@ -1171,10 +1170,23 @@
 	 */
 	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
 	wrmsrl(MSR_LSTAR, system_call);
-	wrmsrl(MSR_CSTAR, ignore_sysret);
 
 #ifdef CONFIG_IA32_EMULATION
-	syscall32_cpu_init();
+	wrmsrl(MSR_CSTAR, ia32_cstar_target);
+	/*
+	 * This only works on Intel CPUs.
+	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+	 * This does not cause SYSENTER to jump to the wrong location, because
+	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+	 */
+	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+#else
+	wrmsrl(MSR_CSTAR, ignore_sysret);
+	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
 #endif
 
 	/* Flags to clear on syscall */
@@ -1226,6 +1238,15 @@
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack.  Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+	(unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
@@ -1307,7 +1328,7 @@
 	 */
 	load_ucode_ap();
 
-	t = &per_cpu(init_tss, cpu);
+	t = &per_cpu(cpu_tss, cpu);
 	oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1391,7 +1412,7 @@
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 	struct thread_struct *thread = &curr->thread;
 
 	wait_for_master_cpu(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6596433..edcb0e2 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -7,16 +7,14 @@
  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  */
 
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/compiler.h>
+#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sysfs.h>
 #include <linux/pci.h>
 
 #include <asm/processor.h>
-#include <linux/smp.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
@@ -116,10 +114,10 @@
 
 
 enum _cache_type {
-	CACHE_TYPE_NULL	= 0,
-	CACHE_TYPE_DATA = 1,
-	CACHE_TYPE_INST = 2,
-	CACHE_TYPE_UNIFIED = 3
+	CTYPE_NULL = 0,
+	CTYPE_DATA = 1,
+	CTYPE_INST = 2,
+	CTYPE_UNIFIED = 3
 };
 
 union _cpuid4_leaf_eax {
@@ -159,11 +157,6 @@
 	struct amd_northbridge *nb;
 };
 
-struct _cpuid4_info {
-	struct _cpuid4_info_regs base;
-	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
-};
-
 unsigned short			num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -220,6 +213,13 @@
 static const unsigned char levels[] = { 1, 1, 2, 3 };
 static const unsigned char types[] = { 1, 2, 3, 3 };
 
+static const enum cache_type cache_type_map[] = {
+	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
+	[CTYPE_DATA] = CACHE_TYPE_DATA,
+	[CTYPE_INST] = CACHE_TYPE_INST,
+	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
 static void
 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 		     union _cpuid4_leaf_ebx *ebx,
@@ -291,14 +291,8 @@
 		(ebx->split.ways_of_associativity + 1) - 1;
 }
 
-struct _cache_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
-	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
-			 unsigned int);
-};
-
 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
+
 /*
  * L3 cache descriptors
  */
@@ -325,20 +319,6 @@
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
-{
-	int node;
-
-	/* only for L3, and not in virtualized environments */
-	if (index < 3)
-		return;
-
-	node = amd_get_nb_id(smp_processor_id());
-	this_leaf->nb = node_to_amd_nb(node);
-	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
-		amd_calc_l3_indices(this_leaf->nb);
-}
-
 /*
  * check whether a slot used for disabling an L3 index is occupied.
  * @l3: L3 cache descriptor
@@ -359,15 +339,13 @@
 	return -1;
 }
 
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 				  unsigned int slot)
 {
 	int index;
+	struct amd_northbridge *nb = this_leaf->priv;
 
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		return -EINVAL;
-
-	index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
+	index = amd_get_l3_disable_slot(nb, slot);
 	if (index >= 0)
 		return sprintf(buf, "%d\n", index);
 
@@ -376,9 +354,10 @@
 
 #define SHOW_CACHE_DISABLE(slot)					\
 static ssize_t								\
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
-			  unsigned int cpu)				\
+cache_disable_##slot##_show(struct device *dev,				\
+			    struct device_attribute *attr, char *buf)	\
 {									\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 	return show_cache_disable(this_leaf, buf, slot);		\
 }
 SHOW_CACHE_DISABLE(0)
@@ -446,25 +425,23 @@
 	return 0;
 }
 
-static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
-				  const char *buf, size_t count,
-				  unsigned int slot)
+static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
+				   const char *buf, size_t count,
+				   unsigned int slot)
 {
 	unsigned long val = 0;
 	int cpu, err = 0;
+	struct amd_northbridge *nb = this_leaf->priv;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		return -EINVAL;
-
-	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 	if (kstrtoul(buf, 10, &val) < 0)
 		return -EINVAL;
 
-	err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
+	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 	if (err) {
 		if (err == -EEXIST)
 			pr_warning("L3 slot %d in use/index already disabled!\n",
@@ -476,41 +453,36 @@
 
 #define STORE_CACHE_DISABLE(slot)					\
 static ssize_t								\
-store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
-			   const char *buf, size_t count,		\
-			   unsigned int cpu)				\
+cache_disable_##slot##_store(struct device *dev,			\
+			     struct device_attribute *attr,		\
+			     const char *buf, size_t count)		\
 {									\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 	return store_cache_disable(this_leaf, buf, count, slot);	\
 }
 STORE_CACHE_DISABLE(0)
 STORE_CACHE_DISABLE(1)
 
-static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
-		show_cache_disable_0, store_cache_disable_0);
-static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
-		show_cache_disable_1, store_cache_disable_1);
-
-static ssize_t
-show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+static ssize_t subcaches_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		return -EINVAL;
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 }
 
-static ssize_t
-store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
-		unsigned int cpu)
+static ssize_t subcaches_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
 {
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 	unsigned long val;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		return -EINVAL;
-
 	if (kstrtoul(buf, 16, &val) < 0)
 		return -EINVAL;
 
@@ -520,9 +492,92 @@
 	return count;
 }
 
-static struct _cache_attr subcaches =
-	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+static DEVICE_ATTR_RW(cache_disable_0);
+static DEVICE_ATTR_RW(cache_disable_1);
+static DEVICE_ATTR_RW(subcaches);
 
+static umode_t
+cache_private_attrs_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (!this_leaf->priv)
+		return 0;
+
+	if ((attr == &dev_attr_subcaches.attr) &&
+	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		return mode;
+
+	if ((attr == &dev_attr_cache_disable_0.attr ||
+	     attr == &dev_attr_cache_disable_1.attr) &&
+	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		return mode;
+
+	return 0;
+}
+
+static struct attribute_group cache_private_group = {
+	.is_visible = cache_private_attrs_is_visible,
+};
+
+static void init_amd_l3_attrs(void)
+{
+	int n = 1;
+	static struct attribute **amd_l3_attrs;
+
+	if (amd_l3_attrs) /* already initialized */
+		return;
+
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		n += 2;
+	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		n += 1;
+
+	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
+	if (!amd_l3_attrs)
+		return;
+
+	n = 0;
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
+		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
+	}
+	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
+
+	cache_private_group.attrs = amd_l3_attrs;
+}
+
+const struct attribute_group *
+cache_get_priv_group(struct cacheinfo *this_leaf)
+{
+	struct amd_northbridge *nb = this_leaf->priv;
+
+	if (this_leaf->level < 3 || !nb)
+		return NULL;
+
+	if (nb && nb->l3_cache.indices)
+		init_amd_l3_attrs();
+
+	return &cache_private_group;
+}
+
+static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
+{
+	int node;
+
+	/* only for L3, and not in virtualized environments */
+	if (index < 3)
+		return;
+
+	node = amd_get_nb_id(smp_processor_id());
+	this_leaf->nb = node_to_amd_nb(node);
+	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
+		amd_calc_l3_indices(this_leaf->nb);
+}
 #else
 #define amd_init_l3_cache(x, y)
 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
@@ -546,7 +601,7 @@
 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 	}
 
-	if (eax.split.type == CACHE_TYPE_NULL)
+	if (eax.split.type == CTYPE_NULL)
 		return -EIO; /* better error ? */
 
 	this_leaf->eax = eax;
@@ -575,7 +630,7 @@
 		/* Do cpuid(op) loop to find out num_cache_leaves */
 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 		cache_eax.full = eax;
-	} while (cache_eax.split.type != CACHE_TYPE_NULL);
+	} while (cache_eax.split.type != CTYPE_NULL);
 	return i;
 }
 
@@ -626,9 +681,9 @@
 
 			switch (this_leaf.eax.split.level) {
 			case 1:
-				if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
+				if (this_leaf.eax.split.type == CTYPE_DATA)
 					new_l1d = this_leaf.size/1024;
-				else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
+				else if (this_leaf.eax.split.type == CTYPE_INST)
 					new_l1i = this_leaf.size/1024;
 				break;
 			case 2:
@@ -747,55 +802,52 @@
 	return l2;
 }
 
-#ifdef CONFIG_SYSFS
-
-/* pointer to _cpuid4_info array (for each cache leaf) */
-static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
-#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
-
-#ifdef CONFIG_SMP
-
-static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
+static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
+				    struct _cpuid4_info_regs *base)
 {
-	struct _cpuid4_info *this_leaf;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf;
 	int i, sibling;
 
 	if (cpu_has_topoext) {
 		unsigned int apicid, nshared, first, last;
 
-		if (!per_cpu(ici_cpuid4_info, cpu))
-			return 0;
-
-		this_leaf = CPUID4_INFO_IDX(cpu, index);
-		nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
+		this_leaf = this_cpu_ci->info_list + index;
+		nshared = base->eax.split.num_threads_sharing + 1;
 		apicid = cpu_data(cpu).apicid;
 		first = apicid - (apicid % nshared);
 		last = first + nshared - 1;
 
 		for_each_online_cpu(i) {
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
+				continue;
+
 			apicid = cpu_data(i).apicid;
 			if ((apicid < first) || (apicid > last))
 				continue;
-			if (!per_cpu(ici_cpuid4_info, i))
-				continue;
-			this_leaf = CPUID4_INFO_IDX(i, index);
+
+			this_leaf = this_cpu_ci->info_list + index;
 
 			for_each_online_cpu(sibling) {
 				apicid = cpu_data(sibling).apicid;
 				if ((apicid < first) || (apicid > last))
 					continue;
-				set_bit(sibling, this_leaf->shared_cpu_map);
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
 			}
 		}
 	} else if (index == 3) {
 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
-			if (!per_cpu(ici_cpuid4_info, i))
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
 				continue;
-			this_leaf = CPUID4_INFO_IDX(i, index);
+			this_leaf = this_cpu_ci->info_list + index;
 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 				if (!cpu_online(sibling))
 					continue;
-				set_bit(sibling, this_leaf->shared_cpu_map);
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
 			}
 		}
 	} else
@@ -804,457 +856,86 @@
 	return 1;
 }
 
-static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
+static void __cache_cpumap_setup(unsigned int cpu, int index,
+				 struct _cpuid4_info_regs *base)
 {
-	struct _cpuid4_info *this_leaf, *sibling_leaf;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf, *sibling_leaf;
 	unsigned long num_threads_sharing;
 	int index_msb, i;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if (cache_shared_amd_cpu_map_setup(cpu, index))
+		if (__cache_amd_cpumap_setup(cpu, index, base))
 			return;
 	}
 
-	this_leaf = CPUID4_INFO_IDX(cpu, index);
-	num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
+	this_leaf = this_cpu_ci->info_list + index;
+	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 	if (num_threads_sharing == 1)
-		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
-	else {
-		index_msb = get_count_order(num_threads_sharing);
+		return;
 
-		for_each_online_cpu(i) {
-			if (cpu_data(i).apicid >> index_msb ==
-			    c->apicid >> index_msb) {
-				cpumask_set_cpu(i,
-					to_cpumask(this_leaf->shared_cpu_map));
-				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
-					sibling_leaf =
-						CPUID4_INFO_IDX(i, index);
-					cpumask_set_cpu(cpu, to_cpumask(
-						sibling_leaf->shared_cpu_map));
-				}
-			}
+	index_msb = get_count_order(num_threads_sharing);
+
+	for_each_online_cpu(i)
+		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
+			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
+
+			if (i == cpu || !sib_cpu_ci->info_list)
+				continue;/* skip if itself or no cacheinfo */
+			sibling_leaf = sib_cpu_ci->info_list + index;
+			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 		}
-	}
-}
-static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
-	struct _cpuid4_info	*this_leaf, *sibling_leaf;
-	int sibling;
-
-	this_leaf = CPUID4_INFO_IDX(cpu, index);
-	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
-		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
-		cpumask_clear_cpu(cpu,
-				  to_cpumask(sibling_leaf->shared_cpu_map));
-	}
-}
-#else
-static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
-{
 }
 
-static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
+static void ci_leaf_init(struct cacheinfo *this_leaf,
+			 struct _cpuid4_info_regs *base)
 {
-}
-#endif
-
-static void free_cache_attributes(unsigned int cpu)
-{
-	int i;
-
-	for (i = 0; i < num_cache_leaves; i++)
-		cache_remove_shared_cpu_map(cpu, i);
-
-	kfree(per_cpu(ici_cpuid4_info, cpu));
-	per_cpu(ici_cpuid4_info, cpu) = NULL;
+	this_leaf->level = base->eax.split.level;
+	this_leaf->type = cache_type_map[base->eax.split.type];
+	this_leaf->coherency_line_size =
+				base->ebx.split.coherency_line_size + 1;
+	this_leaf->ways_of_associativity =
+				base->ebx.split.ways_of_associativity + 1;
+	this_leaf->size = base->size;
+	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
+	this_leaf->physical_line_partition =
+				base->ebx.split.physical_line_partition + 1;
+	this_leaf->priv = base->nb;
 }
 
-static void get_cpu_leaves(void *_retval)
+static int __init_cache_level(unsigned int cpu)
 {
-	int j, *retval = _retval, cpu = smp_processor_id();
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
-	/* Do cpuid and store the results */
-	for (j = 0; j < num_cache_leaves; j++) {
-		struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
-
-		*retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
-		if (unlikely(*retval < 0)) {
-			int i;
-
-			for (i = 0; i < j; i++)
-				cache_remove_shared_cpu_map(cpu, i);
-			break;
-		}
-		cache_shared_cpu_map_setup(cpu, j);
-	}
-}
-
-static int detect_cache_attributes(unsigned int cpu)
-{
-	int			retval;
-
-	if (num_cache_leaves == 0)
+	if (!num_cache_leaves)
 		return -ENOENT;
-
-	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
-	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
-		return -ENOMEM;
-
-	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
-	if (retval) {
-		kfree(per_cpu(ici_cpuid4_info, cpu));
-		per_cpu(ici_cpuid4_info, cpu) = NULL;
-	}
-
-	return retval;
-}
-
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include <linux/cpu.h>
-
-/* pointer to kobject for cpuX/cache */
-static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
-
-struct _index_kobject {
-	struct kobject kobj;
-	unsigned int cpu;
-	unsigned short index;
-};
-
-/* pointer to array of kobjects for cpuX/cache/indexY */
-static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
-#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
-
-#define show_one_plus(file_name, object, val)				\
-static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
-				unsigned int cpu)			\
-{									\
-	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
-}
-
-show_one_plus(level, base.eax.split.level, 0);
-show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
-
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
-			 unsigned int cpu)
-{
-	return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
-}
-
-static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
-					int type, char *buf)
-{
-	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-	int ret;
-
-	if (type)
-		ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
-				cpumask_pr_args(mask));
-	else
-		ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb",
-				cpumask_pr_args(mask));
-	buf[ret++] = '\n';
-	buf[ret] = '\0';
-	return ret;
-}
-
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
-					  unsigned int cpu)
-{
-	return show_shared_cpu_map_func(leaf, 0, buf);
-}
-
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
-					   unsigned int cpu)
-{
-	return show_shared_cpu_map_func(leaf, 1, buf);
-}
-
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
-			 unsigned int cpu)
-{
-	switch (this_leaf->base.eax.split.type) {
-	case CACHE_TYPE_DATA:
-		return sprintf(buf, "Data\n");
-	case CACHE_TYPE_INST:
-		return sprintf(buf, "Instruction\n");
-	case CACHE_TYPE_UNIFIED:
-		return sprintf(buf, "Unified\n");
-	default:
-		return sprintf(buf, "Unknown\n");
-	}
-}
-
-#define to_object(k)	container_of(k, struct _index_kobject, kobj)
-#define to_attr(a)	container_of(a, struct _cache_attr, attr)
-
-#define define_one_ro(_name) \
-static struct _cache_attr _name = \
-	__ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(level);
-define_one_ro(type);
-define_one_ro(coherency_line_size);
-define_one_ro(physical_line_partition);
-define_one_ro(ways_of_associativity);
-define_one_ro(number_of_sets);
-define_one_ro(size);
-define_one_ro(shared_cpu_map);
-define_one_ro(shared_cpu_list);
-
-static struct attribute *default_attrs[] = {
-	&type.attr,
-	&level.attr,
-	&coherency_line_size.attr,
-	&physical_line_partition.attr,
-	&ways_of_associativity.attr,
-	&number_of_sets.attr,
-	&size.attr,
-	&shared_cpu_map.attr,
-	&shared_cpu_list.attr,
-	NULL
-};
-
-#ifdef CONFIG_AMD_NB
-static struct attribute **amd_l3_attrs(void)
-{
-	static struct attribute **attrs;
-	int n;
-
-	if (attrs)
-		return attrs;
-
-	n = ARRAY_SIZE(default_attrs);
-
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		n += 2;
-
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		n += 1;
-
-	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
-	if (attrs == NULL)
-		return attrs = default_attrs;
-
-	for (n = 0; default_attrs[n]; n++)
-		attrs[n] = default_attrs[n];
-
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
-		attrs[n++] = &cache_disable_0.attr;
-		attrs[n++] = &cache_disable_1.attr;
-	}
-
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		attrs[n++] = &subcaches.attr;
-
-	return attrs;
-}
-#endif
-
-static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	struct _cache_attr *fattr = to_attr(attr);
-	struct _index_kobject *this_leaf = to_object(kobj);
-	ssize_t ret;
-
-	ret = fattr->show ?
-		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-			buf, this_leaf->cpu) :
-		0;
-	return ret;
-}
-
-static ssize_t store(struct kobject *kobj, struct attribute *attr,
-		     const char *buf, size_t count)
-{
-	struct _cache_attr *fattr = to_attr(attr);
-	struct _index_kobject *this_leaf = to_object(kobj);
-	ssize_t ret;
-
-	ret = fattr->store ?
-		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-			buf, count, this_leaf->cpu) :
-		0;
-	return ret;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-	.show   = show,
-	.store  = store,
-};
-
-static struct kobj_type ktype_cache = {
-	.sysfs_ops	= &sysfs_ops,
-	.default_attrs	= default_attrs,
-};
-
-static struct kobj_type ktype_percpu_entry = {
-	.sysfs_ops	= &sysfs_ops,
-};
-
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
-{
-	kfree(per_cpu(ici_cache_kobject, cpu));
-	kfree(per_cpu(ici_index_kobject, cpu));
-	per_cpu(ici_cache_kobject, cpu) = NULL;
-	per_cpu(ici_index_kobject, cpu) = NULL;
-	free_cache_attributes(cpu);
-}
-
-static int cpuid4_cache_sysfs_init(unsigned int cpu)
-{
-	int err;
-
-	if (num_cache_leaves == 0)
-		return -ENOENT;
-
-	err = detect_cache_attributes(cpu);
-	if (err)
-		return err;
-
-	/* Allocate all required memory */
-	per_cpu(ici_cache_kobject, cpu) =
-		kzalloc(sizeof(struct kobject), GFP_KERNEL);
-	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
-		goto err_out;
-
-	per_cpu(ici_index_kobject, cpu) = kzalloc(
-	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
-	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
-		goto err_out;
-
-	return 0;
-
-err_out:
-	cpuid4_cache_sysfs_exit(cpu);
-	return -ENOMEM;
-}
-
-static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
-
-/* Add/Remove cache interface for CPU device */
-static int cache_add_dev(struct device *dev)
-{
-	unsigned int cpu = dev->id;
-	unsigned long i, j;
-	struct _index_kobject *this_object;
-	struct _cpuid4_info   *this_leaf;
-	int retval;
-
-	retval = cpuid4_cache_sysfs_init(cpu);
-	if (unlikely(retval < 0))
-		return retval;
-
-	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
-				      &ktype_percpu_entry,
-				      &dev->kobj, "%s", "cache");
-	if (retval < 0) {
-		cpuid4_cache_sysfs_exit(cpu);
-		return retval;
-	}
-
-	for (i = 0; i < num_cache_leaves; i++) {
-		this_object = INDEX_KOBJECT_PTR(cpu, i);
-		this_object->cpu = cpu;
-		this_object->index = i;
-
-		this_leaf = CPUID4_INFO_IDX(cpu, i);
-
-		ktype_cache.default_attrs = default_attrs;
-#ifdef CONFIG_AMD_NB
-		if (this_leaf->base.nb)
-			ktype_cache.default_attrs = amd_l3_attrs();
-#endif
-		retval = kobject_init_and_add(&(this_object->kobj),
-					      &ktype_cache,
-					      per_cpu(ici_cache_kobject, cpu),
-					      "index%1lu", i);
-		if (unlikely(retval)) {
-			for (j = 0; j < i; j++)
-				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
-			kobject_put(per_cpu(ici_cache_kobject, cpu));
-			cpuid4_cache_sysfs_exit(cpu);
-			return retval;
-		}
-		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
-	}
-	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
-
-	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
+	if (!this_cpu_ci)
+		return -EINVAL;
+	this_cpu_ci->num_levels = 3;
+	this_cpu_ci->num_leaves = num_cache_leaves;
 	return 0;
 }
 
-static void cache_remove_dev(struct device *dev)
+static int __populate_cache_leaves(unsigned int cpu)
 {
-	unsigned int cpu = dev->id;
-	unsigned long i;
+	unsigned int idx, ret;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	struct _cpuid4_info_regs id4_regs = {};
 
-	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
-		return;
-	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
-		return;
-	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
-
-	for (i = 0; i < num_cache_leaves; i++)
-		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
-	kobject_put(per_cpu(ici_cache_kobject, cpu));
-	cpuid4_cache_sysfs_exit(cpu);
-}
-
-static int cacheinfo_cpu_callback(struct notifier_block *nfb,
-				  unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct device *dev;
-
-	dev = get_cpu_device(cpu);
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		cache_add_dev(dev);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		cache_remove_dev(dev);
-		break;
+	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
+		if (ret)
+			return ret;
+		ci_leaf_init(this_leaf++, &id4_regs);
+		__cache_cpumap_setup(cpu, idx, &id4_regs);
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block cacheinfo_cpu_notifier = {
-	.notifier_call = cacheinfo_cpu_callback,
-};
-
-static int __init cache_sysfs_init(void)
-{
-	int i, err = 0;
-
-	if (num_cache_leaves == 0)
-		return 0;
-
-	cpu_notifier_register_begin();
-	for_each_online_cpu(i) {
-		struct device *dev = get_cpu_device(i);
-
-		err = cache_add_dev(dev);
-		if (err)
-			goto out;
-	}
-	__register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
-out:
-	cpu_notifier_register_done();
-	return err;
-}
-
-device_initcall(cache_sysfs_init);
-
-#endif
+DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
+DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 10b4690..fe32074 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -14,6 +14,7 @@
 };
 
 #define ATTR_LEN		16
+#define INITIAL_CHECK_INTERVAL	5 * 60 /* 5 minutes */
 
 /* One object for each MCE bank, shared by all CPUs */
 struct mce_bank {
@@ -23,20 +24,20 @@
 	char			attrname[ATTR_LEN];	/* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
 extern mce_banks_t mce_banks_ce_disabled;
 
 #ifdef CONFIG_X86_MCE_INTEL
-unsigned long mce_intel_adjust_timer(unsigned long interval);
-void mce_intel_cmci_poll(void);
+unsigned long cmci_intel_adjust_timer(unsigned long interval);
+bool mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
 #else
-# define mce_intel_adjust_timer mce_adjust_timer_default
-static inline void mce_intel_cmci_poll(void) { }
+# define cmci_intel_adjust_timer mce_adjust_timer_default
+static inline bool mce_intel_cmci_poll(void) { return false; }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
 #endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8bb4330..9c682c2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -186,7 +186,61 @@
 	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
 }
 
-int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
+/*
+ * See AMD Error Scope Hierarchy table in a newer BKDG. For example
+ * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
+ */
+static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+{
+	enum context ctx = error_context(m);
+
+	/* Processor Context Corrupt, no need to fumble too much, die! */
+	if (m->status & MCI_STATUS_PCC)
+		return MCE_PANIC_SEVERITY;
+
+	if (m->status & MCI_STATUS_UC) {
+
+		/*
+		 * On older systems where overflow_recov flag is not present, we
+		 * should simply panic if an error overflow occurs. If
+		 * overflow_recov flag is present and set, then software can try
+		 * to at least kill process to prolong system operation.
+		 */
+		if (mce_flags.overflow_recov) {
+			/* software can try to contain */
+			if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
+				return MCE_PANIC_SEVERITY;
+
+			/* kill current process */
+			return MCE_AR_SEVERITY;
+		} else {
+			/* at least one error was not logged */
+			if (m->status & MCI_STATUS_OVER)
+				return MCE_PANIC_SEVERITY;
+		}
+
+		/*
+		 * For any other case, return MCE_UC_SEVERITY so that we log the
+		 * error and exit #MC handler.
+		 */
+		return MCE_UC_SEVERITY;
+	}
+
+	/*
+	 * deferred error: poll handler catches these and adds to mce_ring so
+	 * memory-failure can take recovery actions.
+	 */
+	if (m->status & MCI_STATUS_DEFERRED)
+		return MCE_DEFERRED_SEVERITY;
+
+	/*
+	 * corrected error: poll handler catches these and passes responsibility
+	 * of decoding the error to EDAC
+	 */
+	return MCE_KEEP_SEVERITY;
+}
+
+static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
 {
 	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
 	enum context ctx = error_context(m);
@@ -216,6 +270,16 @@
 	}
 }
 
+/* Default to mce_severity_intel */
+int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+		    mce_severity_intel;
+
+void __init mcheck_vendor_init_severity(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		mce_severity = mce_severity_amd;
+}
+
 #ifdef CONFIG_DEBUG_FS
 static void *s_start(struct seq_file *f, loff_t *pos)
 {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3c036cb..e535533 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -60,11 +60,12 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
-#define SPINUNIT 100	/* 100ns */
+#define SPINUNIT		100	/* 100ns */
 
 DEFINE_PER_CPU(unsigned, mce_exception_count);
 
 struct mce_bank *mce_banks __read_mostly;
+struct mce_vendor_flags mce_flags __read_mostly;
 
 struct mca_config mca_cfg __read_mostly = {
 	.bootlog  = -1,
@@ -89,9 +90,6 @@
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
 
-/* CMCI storm detection filter */
-static DEFINE_PER_CPU(unsigned long, mce_polled_error);
-
 /*
  * MCA banks polled by the period polling timer for corrected events.
  * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
@@ -622,8 +620,9 @@
  * is already totally * confused. In this case it's likely it will
  * not fully execute the machine check handler either.
  */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
+	bool error_logged = false;
 	struct mce m;
 	int severity;
 	int i;
@@ -646,7 +645,7 @@
 		if (!(m.status & MCI_STATUS_VAL))
 			continue;
 
-		this_cpu_write(mce_polled_error, 1);
+
 		/*
 		 * Uncorrected or signalled events are handled by the exception
 		 * handler when it is enabled, so don't process those here.
@@ -679,8 +678,10 @@
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
 		 */
-		if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
+		if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
+			error_logged = true;
 			mce_log(&m);
+		}
 
 		/*
 		 * Clear state for this bank.
@@ -694,6 +695,8 @@
 	 */
 
 	sync_core();
+
+	return error_logged;
 }
 EXPORT_SYMBOL_GPL(machine_check_poll);
 
@@ -813,7 +816,7 @@
 	 * other CPUs.
 	 */
 	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
-		mce_panic("Fatal Machine check", m, msg);
+		mce_panic("Fatal machine check", m, msg);
 
 	/*
 	 * For UC somewhere we let the CPU who detects it handle it.
@@ -826,7 +829,7 @@
 	 * source or one CPU is hung. Panic.
 	 */
 	if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
-		mce_panic("Machine check from unknown source", NULL, NULL);
+		mce_panic("Fatal machine check from unknown source", NULL, NULL);
 
 	/*
 	 * Now clear all the mces_seen so that they don't reappear on
@@ -1258,7 +1261,7 @@
  * poller finds an MCE, poll 2x faster.  When the poller finds no more
  * errors, poll 2x slower (up to check_interval seconds).
  */
-static unsigned long check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
 
 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
@@ -1268,59 +1271,14 @@
 	return interval;
 }
 
-static unsigned long (*mce_adjust_timer)(unsigned long interval) =
-	mce_adjust_timer_default;
+static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
-static int cmc_error_seen(void)
+static void __restart_timer(struct timer_list *t, unsigned long interval)
 {
-	unsigned long *v = this_cpu_ptr(&mce_polled_error);
-
-	return test_and_clear_bit(0, v);
-}
-
-static void mce_timer_fn(unsigned long data)
-{
-	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	unsigned long iv;
-	int notify;
-
-	WARN_ON(smp_processor_id() != data);
-
-	if (mce_available(this_cpu_ptr(&cpu_info))) {
-		machine_check_poll(MCP_TIMESTAMP,
-				this_cpu_ptr(&mce_poll_banks));
-		mce_intel_cmci_poll();
-	}
-
-	/*
-	 * Alert userspace if needed.  If we logged an MCE, reduce the
-	 * polling interval, otherwise increase the polling interval.
-	 */
-	iv = __this_cpu_read(mce_next_interval);
-	notify = mce_notify_irq();
-	notify |= cmc_error_seen();
-	if (notify) {
-		iv = max(iv / 2, (unsigned long) HZ/100);
-	} else {
-		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
-		iv = mce_adjust_timer(iv);
-	}
-	__this_cpu_write(mce_next_interval, iv);
-	/* Might have become 0 after CMCI storm subsided */
-	if (iv) {
-		t->expires = jiffies + iv;
-		add_timer_on(t, smp_processor_id());
-	}
-}
-
-/*
- * Ensure that the timer is firing in @interval from now.
- */
-void mce_timer_kick(unsigned long interval)
-{
-	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	unsigned long when = jiffies + interval;
-	unsigned long iv = __this_cpu_read(mce_next_interval);
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	if (timer_pending(t)) {
 		if (time_before(when, t->expires))
@@ -1329,6 +1287,53 @@
 		t->expires = round_jiffies(when);
 		add_timer_on(t, smp_processor_id());
 	}
+
+	local_irq_restore(flags);
+}
+
+static void mce_timer_fn(unsigned long data)
+{
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
+	int cpu = smp_processor_id();
+	unsigned long iv;
+
+	WARN_ON(cpu != data);
+
+	iv = __this_cpu_read(mce_next_interval);
+
+	if (mce_available(this_cpu_ptr(&cpu_info))) {
+		machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+
+		if (mce_intel_cmci_poll()) {
+			iv = mce_adjust_timer(iv);
+			goto done;
+		}
+	}
+
+	/*
+	 * Alert userspace if needed. If we logged an MCE, reduce the polling
+	 * interval, otherwise increase the polling interval.
+	 */
+	if (mce_notify_irq())
+		iv = max(iv / 2, (unsigned long) HZ/100);
+	else
+		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+
+done:
+	__this_cpu_write(mce_next_interval, iv);
+	__restart_timer(t, iv);
+}
+
+/*
+ * Ensure that the timer is firing in @interval from now.
+ */
+void mce_timer_kick(unsigned long interval)
+{
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
+	unsigned long iv = __this_cpu_read(mce_next_interval);
+
+	__restart_timer(t, interval);
+
 	if (interval < iv)
 		__this_cpu_write(mce_next_interval, interval);
 }
@@ -1525,45 +1530,46 @@
 		 * Various K7s with broken bank 0 around. Always disable
 		 * by default.
 		 */
-		 if (c->x86 == 6 && cfg->banks > 0)
+		if (c->x86 == 6 && cfg->banks > 0)
 			mce_banks[0].ctl = 0;
 
-		 /*
-		  * Turn off MC4_MISC thresholding banks on those models since
-		  * they're not supported there.
-		  */
-		 if (c->x86 == 0x15 &&
-		     (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
-			 int i;
-			 u64 val, hwcr;
-			 bool need_toggle;
-			 u32 msrs[] = {
+		/*
+		 * overflow_recov is supported for F15h Models 00h-0fh
+		 * even though we don't have a CPUID bit for it.
+		 */
+		if (c->x86 == 0x15 && c->x86_model <= 0xf)
+			mce_flags.overflow_recov = 1;
+
+		/*
+		 * Turn off MC4_MISC thresholding banks on those models since
+		 * they're not supported there.
+		 */
+		if (c->x86 == 0x15 &&
+		    (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
+			int i;
+			u64 hwcr;
+			bool need_toggle;
+			u32 msrs[] = {
 				0x00000413, /* MC4_MISC0 */
 				0xc0000408, /* MC4_MISC1 */
-			 };
+			};
 
-			 rdmsrl(MSR_K7_HWCR, hwcr);
+			rdmsrl(MSR_K7_HWCR, hwcr);
 
-			 /* McStatusWrEn has to be set */
-			 need_toggle = !(hwcr & BIT(18));
+			/* McStatusWrEn has to be set */
+			need_toggle = !(hwcr & BIT(18));
 
-			 if (need_toggle)
-				 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+			if (need_toggle)
+				wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 
-			 for (i = 0; i < ARRAY_SIZE(msrs); i++) {
-				 rdmsrl(msrs[i], val);
+			/* Clear CntP bit safely */
+			for (i = 0; i < ARRAY_SIZE(msrs); i++)
+				msr_clear_bit(msrs[i], 62);
 
-				 /* CntP bit set? */
-				 if (val & BIT_64(62)) {
-					val &= ~BIT_64(62);
-					wrmsrl(msrs[i], val);
-				 }
-			 }
-
-			 /* restore old settings */
-			 if (need_toggle)
-				 wrmsrl(MSR_K7_HWCR, hwcr);
-		 }
+			/* restore old settings */
+			if (need_toggle)
+				wrmsrl(MSR_K7_HWCR, hwcr);
+		}
 	}
 
 	if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1629,10 +1635,11 @@
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
-		mce_adjust_timer = mce_intel_adjust_timer;
+		mce_adjust_timer = cmci_intel_adjust_timer;
 		break;
 	case X86_VENDOR_AMD:
 		mce_amd_feature_init(c);
+		mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
 		break;
 	default:
 		break;
@@ -2017,6 +2024,7 @@
 int __init mcheck_init(void)
 {
 	mcheck_intel_therm_init();
+	mcheck_vendor_init_severity();
 
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f1c3769..55ad9b3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -79,7 +79,7 @@
 	return (bank == 4);
 }
 
-static const char * const bank4_names(struct threshold_block *b)
+static const char *bank4_names(const struct threshold_block *b)
 {
 	switch (b->address) {
 	/* MSR4_MISC0 */
@@ -250,6 +250,7 @@
 			if (!b.interrupt_capable)
 				goto init;
 
+			b.interrupt_enable = 1;
 			new	= (high & MASK_LVTOFF_HI) >> 20;
 			offset  = setup_APIC_mce(offset, new);
 
@@ -322,6 +323,8 @@
 log:
 	mce_setup(&m);
 	rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+	if (!(m.status & MCI_STATUS_VAL))
+		return;
 	m.misc = ((u64)high << 32) | low;
 	m.bank = bank;
 	mce_log(&m);
@@ -497,10 +500,12 @@
 	b->interrupt_capable	= lvt_interrupt_supported(bank, high);
 	b->threshold_limit	= THRESHOLD_MAX;
 
-	if (b->interrupt_capable)
+	if (b->interrupt_capable) {
 		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
-	else
+		b->interrupt_enable = 1;
+	} else {
 		threshold_ktype.default_attrs[2] = NULL;
+	}
 
 	INIT_LIST_HEAD(&b->miscj);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b3c97ba..b4a41cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -39,6 +39,15 @@
 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
 
 /*
+ * CMCI storm detection backoff counter
+ *
+ * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
+ * encountered an error. If not, we decrement it by one. We signal the end of
+ * the CMCI storm when it reaches 0.
+ */
+static DEFINE_PER_CPU(int, cmci_backoff_cnt);
+
+/*
  * cmci_discover_lock protects against parallel discovery attempts
  * which could race against each other.
  */
@@ -46,7 +55,7 @@
 
 #define CMCI_THRESHOLD		1
 #define CMCI_POLL_INTERVAL	(30 * HZ)
-#define CMCI_STORM_INTERVAL	(1 * HZ)
+#define CMCI_STORM_INTERVAL	(HZ)
 #define CMCI_STORM_THRESHOLD	15
 
 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
@@ -82,11 +91,21 @@
 	return !!(cap & MCG_CMCI_P);
 }
 
-void mce_intel_cmci_poll(void)
+bool mce_intel_cmci_poll(void)
 {
 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
-		return;
-	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+		return false;
+
+	/*
+	 * Reset the counter if we've logged an error in the last poll
+	 * during the storm.
+	 */
+	if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
+		this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+	else
+		this_cpu_dec(cmci_backoff_cnt);
+
+	return true;
 }
 
 void mce_intel_hcpu_update(unsigned long cpu)
@@ -97,31 +116,32 @@
 	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 }
 
-unsigned long mce_intel_adjust_timer(unsigned long interval)
+unsigned long cmci_intel_adjust_timer(unsigned long interval)
 {
-	int r;
-
-	if (interval < CMCI_POLL_INTERVAL)
-		return interval;
+	if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
+	    (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
+		mce_notify_irq();
+		return CMCI_STORM_INTERVAL;
+	}
 
 	switch (__this_cpu_read(cmci_storm_state)) {
 	case CMCI_STORM_ACTIVE:
+
 		/*
 		 * We switch back to interrupt mode once the poll timer has
-		 * silenced itself. That means no events recorded and the
-		 * timer interval is back to our poll interval.
+		 * silenced itself. That means no events recorded and the timer
+		 * interval is back to our poll interval.
 		 */
 		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
-		r = atomic_sub_return(1, &cmci_storm_on_cpus);
-		if (r == 0)
+		if (!atomic_sub_return(1, &cmci_storm_on_cpus))
 			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+
 		/* FALLTHROUGH */
 
 	case CMCI_STORM_SUBSIDED:
 		/*
-		 * We wait for all cpus to go back to SUBSIDED
-		 * state. When that happens we switch back to
-		 * interrupt mode.
+		 * We wait for all CPUs to go back to SUBSIDED state. When that
+		 * happens we switch back to interrupt mode.
 		 */
 		if (!atomic_read(&cmci_storm_on_cpus)) {
 			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
@@ -130,10 +150,8 @@
 		}
 		return CMCI_POLL_INTERVAL;
 	default:
-		/*
-		 * We have shiny weather. Let the poll do whatever it
-		 * thinks.
-		 */
+
+		/* We have shiny weather. Let the poll do whatever it thinks. */
 		return interval;
 	}
 }
@@ -178,7 +196,8 @@
 	cmci_storm_disable_banks();
 	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 	r = atomic_add_return(1, &cmci_storm_on_cpus);
-	mce_timer_kick(CMCI_POLL_INTERVAL);
+	mce_timer_kick(CMCI_STORM_INTERVAL);
+	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 
 	if (r == 1)
 		pr_notice("CMCI storm detected: switching to poll mode\n");
@@ -195,6 +214,7 @@
 {
 	if (cmci_storm_detect())
 		return;
+
 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 	mce_notify_irq();
 }
@@ -286,6 +306,7 @@
 
 	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
 		return;
+
 	local_irq_save(flags);
 	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 	local_irq_restore(flags);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index bfbbe61..12829c3 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -21,7 +21,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/firmware.h>
-#include <linux/pci_ids.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
index d45df4bd..a413a69 100644
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -23,57 +23,6 @@
 #include <asm/processor.h>
 #include <asm/cmdline.h>
 
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
-		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_vendor() to get vendor id for AP.
- *
- * x86_vendor() gets vendor information directly through cpuid.
- */
-static int x86_vendor(void)
-{
-	u32 eax = 0x00000000;
-	u32 ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-		return X86_VENDOR_INTEL;
-
-	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-		return X86_VENDOR_AMD;
-
-	return X86_VENDOR_UNKNOWN;
-}
-
-static int x86_family(void)
-{
-	u32 eax = 0x00000001;
-	u32 ebx, ecx = 0, edx;
-	int x86;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	x86 = (eax >> 8) & 0xf;
-	if (x86 == 15)
-		x86 += (eax >> 20) & 0xff;
-
-	return x86;
-}
-
 static bool __init check_loader_disabled_bsp(void)
 {
 #ifdef CONFIG_X86_32
@@ -96,7 +45,7 @@
 
 void __init load_ucode_bsp(void)
 {
-	int vendor, x86;
+	int vendor, family;
 
 	if (check_loader_disabled_bsp())
 		return;
@@ -105,15 +54,15 @@
 		return;
 
 	vendor = x86_vendor();
-	x86 = x86_family();
+	family = x86_family();
 
 	switch (vendor) {
 	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
+		if (family >= 6)
 			load_ucode_intel_bsp();
 		break;
 	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
+		if (family >= 0x10)
 			load_ucode_amd_bsp();
 		break;
 	default:
@@ -132,7 +81,7 @@
 
 void load_ucode_ap(void)
 {
-	int vendor, x86;
+	int vendor, family;
 
 	if (check_loader_disabled_ap())
 		return;
@@ -141,15 +90,15 @@
 		return;
 
 	vendor = x86_vendor();
-	x86 = x86_family();
+	family = x86_family();
 
 	switch (vendor) {
 	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
+		if (family >= 6)
 			load_ucode_intel_ap();
 		break;
 	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
+		if (family >= 0x10)
 			load_ucode_amd_ap();
 		break;
 	default:
@@ -179,18 +128,18 @@
 
 void reload_early_microcode(void)
 {
-	int vendor, x86;
+	int vendor, family;
 
 	vendor = x86_vendor();
-	x86 = x86_family();
+	family = x86_family();
 
 	switch (vendor) {
 	case X86_VENDOR_INTEL:
-		if (x86 >= 6)
+		if (family >= 6)
 			reload_ucode_intel();
 		break;
 	case X86_VENDOR_AMD:
-		if (x86 >= 0x10)
+		if (family >= 0x10)
 			reload_ucode_amd();
 		break;
 	default:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 746e7fd..a41bead 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -124,7 +124,7 @@
 	cpf = cpu_sig.pf;
 	crev = cpu_sig.rev;
 
-	return get_matching_microcode(csig, cpf, mc_intel, crev);
+	return get_matching_microcode(csig, cpf, crev, mc_intel);
 }
 
 static int apply_microcode_intel(int cpu)
@@ -226,7 +226,7 @@
 
 		csig = uci->cpu_sig.sig;
 		cpf = uci->cpu_sig.pf;
-		if (get_matching_microcode(csig, cpf, mc, new_rev)) {
+		if (get_matching_microcode(csig, cpf, new_rev, mc)) {
 			vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index 420eb93..2f49ab4 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -16,6 +16,14 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  */
+
+/*
+ * This needs to be before all headers so that pr_debug in printk.h doesn't turn
+ * printk calls into no_printk().
+ *
+ *#define DEBUG
+ */
+
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -28,6 +36,9 @@
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"microcode: " fmt
+
 static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
 static struct mc_saved_data {
 	unsigned int mc_saved_count;
@@ -35,50 +46,45 @@
 } mc_saved_data;
 
 static enum ucode_state
-generic_load_microcode_early(struct microcode_intel **mc_saved_p,
-			     unsigned int mc_saved_count,
-			     struct ucode_cpu_info *uci)
+load_microcode_early(struct microcode_intel **saved,
+		     unsigned int num_saved, struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *ucode_ptr, *new_mc = NULL;
-	int new_rev = uci->cpu_sig.rev;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
-	unsigned int csig = uci->cpu_sig.sig;
-	unsigned int cpf = uci->cpu_sig.pf;
-	int i;
+	struct microcode_header_intel *mc_hdr;
+	int new_rev, ret, i;
 
-	for (i = 0; i < mc_saved_count; i++) {
-		ucode_ptr = mc_saved_p[i];
+	new_rev = uci->cpu_sig.rev;
 
-		mc_header = (struct microcode_header_intel *)ucode_ptr;
-		mc_size = get_totalsize(mc_header);
-		if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
-			new_rev = mc_header->rev;
-			new_mc  = ucode_ptr;
-		}
+	for (i = 0; i < num_saved; i++) {
+		ucode_ptr = saved[i];
+		mc_hdr	  = (struct microcode_header_intel *)ucode_ptr;
+
+		ret = get_matching_microcode(uci->cpu_sig.sig,
+					     uci->cpu_sig.pf,
+					     new_rev,
+					     ucode_ptr);
+		if (!ret)
+			continue;
+
+		new_rev = mc_hdr->rev;
+		new_mc  = ucode_ptr;
 	}
 
-	if (!new_mc) {
-		state = UCODE_NFOUND;
-		goto out;
-	}
+	if (!new_mc)
+		return UCODE_NFOUND;
 
 	uci->mc = (struct microcode_intel *)new_mc;
-out:
-	return state;
+	return UCODE_OK;
 }
 
-static void
-microcode_pointer(struct microcode_intel **mc_saved,
-		  unsigned long *mc_saved_in_initrd,
-		  unsigned long initrd_start, int mc_saved_count)
+static inline void
+copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
+		  unsigned long off, int num_saved)
 {
 	int i;
 
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved[i] = (struct microcode_intel *)
-			      (mc_saved_in_initrd[i] + initrd_start);
+	for (i = 0; i < num_saved; i++)
+		mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
 }
 
 #ifdef CONFIG_X86_32
@@ -102,55 +108,27 @@
 #endif
 
 static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data,
-	       unsigned long *mc_saved_in_initrd,
-	       unsigned long initrd_start,
-	       struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+	       unsigned long initrd_start, struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
 	unsigned int count = mc_saved_data->mc_saved_count;
 
 	if (!mc_saved_data->mc_saved) {
-		microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
-				  initrd_start, count);
+		copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
 
-		return generic_load_microcode_early(mc_saved_tmp, count, uci);
+		return load_microcode_early(mc_saved_tmp, count, uci);
 	} else {
 #ifdef CONFIG_X86_32
 		microcode_phys(mc_saved_tmp, mc_saved_data);
-		return generic_load_microcode_early(mc_saved_tmp, count, uci);
+		return load_microcode_early(mc_saved_tmp, count, uci);
 #else
-		return generic_load_microcode_early(mc_saved_data->mc_saved,
+		return load_microcode_early(mc_saved_data->mc_saved,
 						    count, uci);
 #endif
 	}
 }
 
-static u8 get_x86_family(unsigned long sig)
-{
-	u8 x86;
-
-	x86 = (sig >> 8) & 0xf;
-
-	if (x86 == 0xf)
-		x86 += (sig >> 20) & 0xff;
-
-	return x86;
-}
-
-static u8 get_x86_model(unsigned long sig)
-{
-	u8 x86, x86_model;
-
-	x86 = get_x86_family(sig);
-	x86_model = (sig >> 4) & 0xf;
-
-	if (x86 == 0x6 || x86 == 0xf)
-		x86_model += ((sig >> 16) & 0xf) << 4;
-
-	return x86_model;
-}
-
 /*
  * Given CPU signature and a microcode patch, this function finds if the
  * microcode patch has matching family and model with the CPU.
@@ -159,42 +137,40 @@
 matching_model_microcode(struct microcode_header_intel *mc_header,
 			unsigned long sig)
 {
-	u8 x86, x86_model;
-	u8 x86_ucode, x86_model_ucode;
+	unsigned int fam, model;
+	unsigned int fam_ucode, model_ucode;
 	struct extended_sigtable *ext_header;
 	unsigned long total_size = get_totalsize(mc_header);
 	unsigned long data_size = get_datasize(mc_header);
 	int ext_sigcount, i;
 	struct extended_signature *ext_sig;
 
-	x86 = get_x86_family(sig);
-	x86_model = get_x86_model(sig);
+	fam   = __x86_family(sig);
+	model = x86_model(sig);
 
-	x86_ucode = get_x86_family(mc_header->sig);
-	x86_model_ucode = get_x86_model(mc_header->sig);
+	fam_ucode   = __x86_family(mc_header->sig);
+	model_ucode = x86_model(mc_header->sig);
 
-	if (x86 == x86_ucode && x86_model == x86_model_ucode)
+	if (fam == fam_ucode && model == model_ucode)
 		return UCODE_OK;
 
 	/* Look for ext. headers: */
 	if (total_size <= data_size + MC_HEADER_SIZE)
 		return UCODE_NFOUND;
 
-	ext_header = (struct extended_sigtable *)
-		     mc_header + data_size + MC_HEADER_SIZE;
+	ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
+	ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
 	ext_sigcount = ext_header->count;
-	ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 
 	for (i = 0; i < ext_sigcount; i++) {
-		x86_ucode = get_x86_family(ext_sig->sig);
-		x86_model_ucode = get_x86_model(ext_sig->sig);
+		fam_ucode   = __x86_family(ext_sig->sig);
+		model_ucode = x86_model(ext_sig->sig);
 
-		if (x86 == x86_ucode && x86_model == x86_model_ucode)
+		if (fam == fam_ucode && model == model_ucode)
 			return UCODE_OK;
 
 		ext_sig++;
 	}
-
 	return UCODE_NFOUND;
 }
 
@@ -204,7 +180,7 @@
 	       unsigned int mc_saved_count)
 {
 	int i, j;
-	struct microcode_intel **mc_saved_p;
+	struct microcode_intel **saved_ptr;
 	int ret;
 
 	if (!mc_saved_count)
@@ -213,39 +189,45 @@
 	/*
 	 * Copy new microcode data.
 	 */
-	mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
-			     GFP_KERNEL);
-	if (!mc_saved_p)
+	saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+	if (!saved_ptr)
 		return -ENOMEM;
 
 	for (i = 0; i < mc_saved_count; i++) {
-		struct microcode_intel *mc = mc_saved_src[i];
-		struct microcode_header_intel *mc_header = &mc->hdr;
-		unsigned long mc_size = get_totalsize(mc_header);
-		mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
-		if (!mc_saved_p[i]) {
-			ret = -ENOMEM;
-			goto err;
-		}
+		struct microcode_header_intel *mc_hdr;
+		struct microcode_intel *mc;
+		unsigned long size;
+
 		if (!mc_saved_src[i]) {
 			ret = -EINVAL;
 			goto err;
 		}
-		memcpy(mc_saved_p[i], mc, mc_size);
+
+		mc     = mc_saved_src[i];
+		mc_hdr = &mc->hdr;
+		size   = get_totalsize(mc_hdr);
+
+		saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+		if (!saved_ptr[i]) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		memcpy(saved_ptr[i], mc, size);
 	}
 
 	/*
 	 * Point to newly saved microcode.
 	 */
-	mc_saved_data->mc_saved = mc_saved_p;
+	mc_saved_data->mc_saved = saved_ptr;
 	mc_saved_data->mc_saved_count = mc_saved_count;
 
 	return 0;
 
 err:
 	for (j = 0; j <= i; j++)
-		kfree(mc_saved_p[j]);
-	kfree(mc_saved_p);
+		kfree(saved_ptr[j]);
+	kfree(saved_ptr);
 
 	return ret;
 }
@@ -257,48 +239,45 @@
  * - or if it is a newly discovered microcode patch.
  *
  * The microcode patch should have matching model with CPU.
+ *
+ * Returns: The updated number @num_saved of saved microcode patches.
  */
-static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
-		     unsigned int *mc_saved_count_p)
+static unsigned int _save_mc(struct microcode_intel **mc_saved,
+			     u8 *ucode_ptr, unsigned int num_saved)
 {
-	int i;
-	int found = 0;
-	unsigned int mc_saved_count = *mc_saved_count_p;
-	struct microcode_header_intel *mc_header;
+	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
+	unsigned int sig, pf, new_rev;
+	int found = 0, i;
 
-	mc_header = (struct microcode_header_intel *)ucode_ptr;
-	for (i = 0; i < mc_saved_count; i++) {
-		unsigned int sig, pf;
-		unsigned int new_rev;
-		struct microcode_header_intel *mc_saved_header =
-			     (struct microcode_header_intel *)mc_saved[i];
-		sig = mc_saved_header->sig;
-		pf = mc_saved_header->pf;
-		new_rev = mc_header->rev;
+	mc_hdr = (struct microcode_header_intel *)ucode_ptr;
 
-		if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
-			found = 1;
-			if (update_match_revision(mc_header, new_rev)) {
-				/*
-				 * Found an older ucode saved before.
-				 * Replace the older one with this newer
-				 * one.
-				 */
-				mc_saved[i] =
-					(struct microcode_intel *)ucode_ptr;
-				break;
-			}
-		}
-	}
-	if (i >= mc_saved_count && !found)
+	for (i = 0; i < num_saved; i++) {
+		mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
+		sig	     = mc_saved_hdr->sig;
+		pf	     = mc_saved_hdr->pf;
+		new_rev	     = mc_hdr->rev;
+
+		if (!get_matching_sig(sig, pf, new_rev, ucode_ptr))
+			continue;
+
+		found = 1;
+
+		if (!revision_is_newer(mc_hdr, new_rev))
+			continue;
+
 		/*
-		 * This ucode is first time discovered in ucode file.
-		 * Save it to memory.
+		 * Found an older ucode saved earlier. Replace it with
+		 * this newer one.
 		 */
-		mc_saved[mc_saved_count++] =
-				 (struct microcode_intel *)ucode_ptr;
+		mc_saved[i] = (struct microcode_intel *)ucode_ptr;
+		break;
+	}
 
-	*mc_saved_count_p = mc_saved_count;
+	/* Newly detected microcode, save it to memory. */
+	if (i >= num_saved && !found)
+		mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
+
+	return num_saved;
 }
 
 /*
@@ -346,7 +325,7 @@
 			continue;
 		}
 
-		_save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
+		mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
 
 		ucode_ptr += mc_size;
 	}
@@ -372,7 +351,7 @@
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
-	u8 x86, x86_model;
+	unsigned int family, model;
 	struct cpu_signature csig;
 	unsigned int eax, ebx, ecx, edx;
 
@@ -387,10 +366,10 @@
 	native_cpuid(&eax, &ebx, &ecx, &edx);
 	csig.sig = eax;
 
-	x86 = get_x86_family(csig.sig);
-	x86_model = get_x86_model(csig.sig);
+	family = __x86_family(csig.sig);
+	model  = x86_model(csig.sig);
 
-	if ((x86_model >= 5) || (x86 > 6)) {
+	if ((model >= 5) || (family > 6)) {
 		/* get processor flags from MSR 0x17 */
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
@@ -429,8 +408,7 @@
 	sig = uci.cpu_sig.sig;
 	pf = uci.cpu_sig.pf;
 	rev = uci.cpu_sig.rev;
-	pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
-		 smp_processor_id(), sig, pf, rev);
+	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
 
 	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
 		struct microcode_header_intel *mc_saved_header;
@@ -457,8 +435,7 @@
 		if (total_size <= data_size + MC_HEADER_SIZE)
 			continue;
 
-		ext_header = (struct extended_sigtable *)
-			     mc_saved_header + data_size + MC_HEADER_SIZE;
+		ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
 		ext_sigcount = ext_header->count;
 		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 
@@ -515,8 +492,7 @@
 	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
 	 * version.
 	 */
-
-	_save_mc(mc_saved_tmp, mc, &mc_saved_count);
+	mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
 
 	/*
 	 * Save the mc_save_tmp in global mc_saved_data.
@@ -548,12 +524,10 @@
 
 static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
 static __init enum ucode_state
-scan_microcode(unsigned long start, unsigned long end,
-		struct mc_saved_data *mc_saved_data,
-		unsigned long *mc_saved_in_initrd,
-		struct ucode_cpu_info *uci)
+scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+	       unsigned long start, unsigned long size,
+	       struct ucode_cpu_info *uci)
 {
-	unsigned int size = end - start + 1;
 	struct cpio_data cd;
 	long offset = 0;
 #ifdef CONFIG_X86_32
@@ -569,10 +543,8 @@
 	if (!cd.data)
 		return UCODE_ERROR;
 
-
 	return get_matching_model_microcode(0, start, cd.data, cd.size,
-					    mc_saved_data, mc_saved_in_initrd,
-					    uci);
+					    mc_saved_data, initrd, uci);
 }
 
 /*
@@ -704,7 +676,7 @@
 	if (count == 0)
 		return ret;
 
-	microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
 	ret = save_microcode(&mc_saved_data, mc_saved, count);
 	if (ret)
 		pr_err("Cannot save microcode patches from initrd.\n");
@@ -716,52 +688,44 @@
 
 static void __init
 _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-		      unsigned long *mc_saved_in_initrd,
-		      unsigned long initrd_start_early,
-		      unsigned long initrd_end_early,
-		      struct ucode_cpu_info *uci)
+		      unsigned long *initrd,
+		      unsigned long start, unsigned long size)
 {
+	struct ucode_cpu_info uci;
 	enum ucode_state ret;
 
-	collect_cpu_info_early(uci);
-	scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
-		       mc_saved_in_initrd, uci);
+	collect_cpu_info_early(&uci);
 
-	ret = load_microcode(mc_saved_data, mc_saved_in_initrd,
-			     initrd_start_early, uci);
+	ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+	if (ret != UCODE_OK)
+		return;
 
-	if (ret == UCODE_OK)
-		apply_microcode_early(uci, true);
+	ret = load_microcode(mc_saved_data, initrd, start, &uci);
+	if (ret != UCODE_OK)
+		return;
+
+	apply_microcode_early(&uci, true);
 }
 
-void __init
-load_ucode_intel_bsp(void)
+void __init load_ucode_intel_bsp(void)
 {
-	u64 ramdisk_image, ramdisk_size;
-	unsigned long initrd_start_early, initrd_end_early;
-	struct ucode_cpu_info uci;
+	u64 start, size;
 #ifdef CONFIG_X86_32
-	struct boot_params *boot_params_p;
+	struct boot_params *p;
 
-	boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
-	ramdisk_image = boot_params_p->hdr.ramdisk_image;
-	ramdisk_size  = boot_params_p->hdr.ramdisk_size;
-	initrd_start_early = ramdisk_image;
-	initrd_end_early = initrd_start_early + ramdisk_size;
+	p	= (struct boot_params *)__pa_nodebug(&boot_params);
+	start	= p->hdr.ramdisk_image;
+	size	= p->hdr.ramdisk_size;
 
 	_load_ucode_intel_bsp(
-		(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-		(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-		initrd_start_early, initrd_end_early, &uci);
+			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+			start, size);
 #else
-	ramdisk_image = boot_params.hdr.ramdisk_image;
-	ramdisk_size  = boot_params.hdr.ramdisk_size;
-	initrd_start_early = ramdisk_image + PAGE_OFFSET;
-	initrd_end_early = initrd_start_early + ramdisk_size;
+	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
+	size	= boot_params.hdr.ramdisk_size;
 
-	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
-			      initrd_start_early, initrd_end_early,
-			      &uci);
+	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
 #endif
 }
 
@@ -771,6 +735,7 @@
 	struct ucode_cpu_info uci;
 	unsigned long *mc_saved_in_initrd_p;
 	unsigned long initrd_start_addr;
+	enum ucode_state ret;
 #ifdef CONFIG_X86_32
 	unsigned long *initrd_start_p;
 
@@ -793,8 +758,12 @@
 		return;
 
 	collect_cpu_info_early(&uci);
-	load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-		       initrd_start_addr, &uci);
+	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+			     initrd_start_addr, &uci);
+
+	if (ret != UCODE_OK)
+		return;
+
 	apply_microcode_early(&uci, true);
 }
 
@@ -808,8 +777,8 @@
 
 	collect_cpu_info_early(&uci);
 
-	ret = generic_load_microcode_early(mc_saved_data.mc_saved,
-					   mc_saved_data.mc_saved_count, &uci);
+	ret = load_microcode_early(mc_saved_data.mc_saved,
+				   mc_saved_data.mc_saved_count, &uci);
 	if (ret != UCODE_OK)
 		return;
 
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index ce69320..cd47a51 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -38,12 +38,6 @@
 	return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
 }
 
-int
-update_match_revision(struct microcode_header_intel *mc_header, int rev)
-{
-	return (mc_header->rev <= rev) ? 0 : 1;
-}
-
 int microcode_sanity_check(void *mc, int print_err)
 {
 	unsigned long total_size, data_size, ext_table_size;
@@ -128,10 +122,9 @@
 EXPORT_SYMBOL_GPL(microcode_sanity_check);
 
 /*
- * return 0 - no update found
- * return 1 - found update
+ * Returns 1 if update has been found, 0 otherwise.
  */
-int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
+int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc)
 {
 	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header;
@@ -159,16 +152,15 @@
 }
 
 /*
- * return 0 - no update found
- * return 1 - found update
+ * Returns 1 if update has been found, 0 otherwise.
  */
-int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
+int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc)
 {
-	struct microcode_header_intel *mc_header = mc;
+	struct microcode_header_intel *mc_hdr = mc;
 
-	if (!update_match_revision(mc_header, rev))
+	if (!revision_is_newer(mc_hdr, rev))
 		return 0;
 
-	return get_matching_sig(csig, cpf, mc, rev);
+	return get_matching_sig(csig, cpf, rev, mc);
 }
 EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 36d99a3..3f20710 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -6,7 +6,7 @@
 IN=$1
 OUT=$2
 
-function dump_array()
+dump_array()
 {
 	ARRAY=$1
 	SIZE=$2
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b71a7f8..e2888a3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2147,24 +2147,24 @@
 static unsigned long code_segment_base(struct pt_regs *regs)
 {
 	/*
+	 * For IA32 we look at the GDT/LDT segment base to convert the
+	 * effective IP to a linear address.
+	 */
+
+#ifdef CONFIG_X86_32
+	/*
 	 * If we are in VM86 mode, add the segment offset to convert to a
 	 * linear address.
 	 */
 	if (regs->flags & X86_VM_MASK)
 		return 0x10 * regs->cs;
 
-	/*
-	 * For IA32 we look at the GDT/LDT segment base to convert the
-	 * effective IP to a linear address.
-	 */
-#ifdef CONFIG_X86_32
 	if (user_mode(regs) && regs->cs != __USER_CS)
 		return get_segment_base(regs->cs);
 #else
-	if (test_thread_flag(TIF_IA32)) {
-		if (user_mode(regs) && regs->cs != __USER32_CS)
-			return get_segment_base(regs->cs);
-	}
+	if (user_mode(regs) && !user_64bit_mode(regs) &&
+	    regs->cs != __USER32_CS)
+		return get_segment_base(regs->cs);
 #endif
 	return 0;
 }
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index aceb2f9..c76d3e3 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -105,7 +105,7 @@
 #ifdef CONFIG_X86_32
 	struct pt_regs fixed_regs;
 
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 	}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3d35033..6367a78 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -286,13 +286,13 @@
 	initial_boot_params = dt = early_memremap(initial_dtb, map_len);
 	size = of_get_flat_dt_size();
 	if (map_len < size) {
-		early_iounmap(dt, map_len);
+		early_memunmap(dt, map_len);
 		initial_boot_params = dt = early_memremap(initial_dtb, size);
 		map_len = size;
 	}
 
 	unflatten_and_copy_device_tree();
-	early_iounmap(dt, map_len);
+	early_memunmap(dt, map_len);
 }
 #else
 static inline void x86_flattree_get_config(void) { }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index cf3df1d..9c30acf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -25,10 +25,12 @@
 int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 
-static void printk_stack_address(unsigned long address, int reliable)
+static void printk_stack_address(unsigned long address, int reliable,
+		void *data)
 {
-	pr_cont(" [<%p>] %s%pB\n",
-		(void *)address, reliable ? "" : "? ", (void *)address);
+	printk("%s [<%p>] %s%pB\n",
+		(char *)data, (void *)address, reliable ? "" : "? ",
+		(void *)address);
 }
 
 void printk_address(unsigned long address)
@@ -155,8 +157,7 @@
 static void print_trace_address(void *data, unsigned long addr, int reliable)
 {
 	touch_nmi_watchdog();
-	printk(data);
-	printk_stack_address(addr, reliable);
+	printk_stack_address(addr, reliable, data);
 }
 
 static const struct stacktrace_ops print_trace_ops = {
@@ -278,7 +279,7 @@
 	print_modules();
 	show_regs(regs);
 #ifdef CONFIG_X86_32
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
 	} else {
@@ -307,7 +308,7 @@
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 		report_bug(regs->ip, regs);
 
 	if (__die(str, regs, err))
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 5abd4cd..464ffd6 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -108,9 +108,12 @@
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
-		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			pr_cont("\n");
-		pr_cont(" %08lx", *stack++);
+		if ((i % STACKSLOTS_PER_LINE) == 0) {
+			if (i != 0)
+				pr_cont("\n");
+			printk("%s %08lx", log_lvl, *stack++);
+		} else
+			pr_cont(" %08lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	pr_cont("\n");
@@ -123,13 +126,13 @@
 	int i;
 
 	show_regs_print_info(KERN_EMERG);
-	__show_regs(regs, !user_mode_vm(regs));
+	__show_regs(regs, !user_mode(regs));
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
 	 */
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index ff86f19..5f1c626 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -280,12 +280,15 @@
 				pr_cont(" <EOI> ");
 			}
 		} else {
-		if (((long) stack & (THREAD_SIZE-1)) == 0)
+		if (kstack_end(stack))
 			break;
 		}
-		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			pr_cont("\n");
-		pr_cont(" %016lx", *stack++);
+		if ((i % STACKSLOTS_PER_LINE) == 0) {
+			if (i != 0)
+				pr_cont("\n");
+			printk("%s %016lx", log_lvl, *stack++);
+		} else
+			pr_cont(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	preempt_enable();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 46201de..7d46bb2 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -661,7 +661,7 @@
 	extmap = (struct e820entry *)(sdata->data);
 	__append_e820_map(extmap, entries);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-	early_iounmap(sdata, data_len);
+	early_memunmap(sdata, data_len);
 	printk(KERN_INFO "e820: extended physical RAM map:\n");
 	e820_print_map("extended");
 }
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index a62536a..49ff55e 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -95,20 +95,6 @@
 #define DLL             0       /*  Divisor Latch Low         */
 #define DLH             1       /*  Divisor latch High        */
 
-static void mem32_serial_out(unsigned long addr, int offset, int value)
-{
-	uint32_t *vaddr = (uint32_t *)addr;
-	/* shift implied by pointer type */
-	writel(value, vaddr + offset);
-}
-
-static unsigned int mem32_serial_in(unsigned long addr, int offset)
-{
-	uint32_t *vaddr = (uint32_t *)addr;
-	/* shift implied by pointer type */
-	return readl(vaddr + offset);
-}
-
 static unsigned int io_serial_in(unsigned long addr, int offset)
 {
 	return inb(addr + offset);
@@ -205,6 +191,20 @@
 }
 
 #ifdef CONFIG_PCI
+static void mem32_serial_out(unsigned long addr, int offset, int value)
+{
+	u32 *vaddr = (u32 *)addr;
+	/* shift implied by pointer type */
+	writel(value, vaddr + offset);
+}
+
+static unsigned int mem32_serial_in(unsigned long addr, int offset)
+{
+	u32 *vaddr = (u32 *)addr;
+	/* shift implied by pointer type */
+	return readl(vaddr + offset);
+}
+
 /*
  * early_pci_serial_init()
  *
@@ -217,8 +217,8 @@
 	unsigned divisor;
 	unsigned long baud = DEFAULT_BAUD;
 	u8 bus, slot, func;
-	uint32_t classcode, bar0;
-	uint16_t cmdreg;
+	u32 classcode, bar0;
+	u16 cmdreg;
 	char *e;
 
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 31e2d5b..1c30976 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,10 +395,13 @@
 	/*CFI_REL_OFFSET cs, 0*/
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
-	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
-	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
+	 * A tiny bit of offset fixup is necessary: TI_sysenter_return
+	 * is relative to thread_info, which is at the bottom of the
+	 * kernel stack page.  4*4 means the 4 words pushed above;
+	 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
+	 * and THREAD_SIZE takes us to the bottom.
 	 */
-	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
+	pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
 	CFI_REL_OFFSET eip, 0
 
 	pushl_cfi %eax
@@ -432,7 +435,7 @@
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testl $_TIF_ALLWORK_MASK, %ecx
-	jne sysexit_audit
+	jnz sysexit_audit
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
 	movl PT_EIP(%esp), %edx
@@ -460,7 +463,7 @@
 
 sysexit_audit:
 	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jne syscall_exit_work
+	jnz syscall_exit_work
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	movl %eax,%edx		/* second arg, syscall return value */
@@ -472,7 +475,7 @@
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jne syscall_exit_work
+	jnz syscall_exit_work
 	movl PT_EAX(%esp),%eax	/* reload syscall return value */
 	jmp sysenter_exit
 #endif
@@ -510,7 +513,7 @@
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
-	jne syscall_exit_work
+	jnz syscall_exit_work
 
 restore_all:
 	TRACE_IRQS_IRET
@@ -612,7 +615,7 @@
 #ifdef CONFIG_VM86
 	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 	movl %esp, %eax
-	jne work_notifysig_v86		# returning to kernel-space or
+	jnz work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
 1:
 #else
@@ -720,43 +723,22 @@
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -816,15 +798,9 @@
 	pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
 	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661:	pushl_cfi $do_general_protection
-662:
-.section .altinstructions,"a"
-	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
-.previous
-.section .altinstr_replacement,"ax"
-663:	pushl $do_simd_coprocessor_error
-664:
-.previous
+	ALTERNATIVE "pushl_cfi $do_general_protection",	\
+		    "pushl $do_simd_coprocessor_error", \
+		    X86_FEATURE_XMM
 #else
 	pushl_cfi $do_simd_coprocessor_error
 #endif
@@ -1240,20 +1216,13 @@
 	/*CFI_REL_OFFSET es, 0*/
 	pushl_cfi %ds
 	/*CFI_REL_OFFSET ds, 0*/
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	cld
 	movl $(__KERNEL_PERCPU), %ecx
 	movl %ecx, %fs
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index f0095a7..c7b2384 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -14,27 +14,14 @@
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers up to R11.
- * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
  * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  * - idtentry - Define exception entry points.
  */
@@ -70,10 +57,6 @@
 	.section .entry.text, "ax"
 
 
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_usergs_sysret64)
 	swapgs
@@ -82,9 +65,9 @@
 #endif /* CONFIG_PARAVIRT */
 
 
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON
 1:
@@ -116,8 +99,8 @@
 	call debug_stack_reset
 .endm
 
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+.macro TRACE_IRQS_IRETQ_DEBUG
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON_DEBUG
 1:
@@ -130,34 +113,7 @@
 #endif
 
 /*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
-	/* %rsp:at FRAMEEND */
-	.macro FIXUP_TOP_OF_STACK tmp offset=0
-	movq PER_CPU_VAR(old_rsp),\tmp
-	movq \tmp,RSP+\offset(%rsp)
-	movq $__USER_DS,SS+\offset(%rsp)
-	movq $__USER_CS,CS+\offset(%rsp)
-	movq RIP+\offset(%rsp),\tmp  /* get rip */
-	movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
-	movq R11+\offset(%rsp),\tmp  /* get eflags */
-	movq \tmp,EFLAGS+\offset(%rsp)
-	.endm
-
-	.macro RESTORE_TOP_OF_STACK tmp offset=0
-	movq RSP+\offset(%rsp),\tmp
-	movq \tmp,PER_CPU_VAR(old_rsp)
-	movq EFLAGS+\offset(%rsp),\tmp
-	movq \tmp,R11+\offset(%rsp)
-	.endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
 	.macro EMPTY_FRAME start=1 offset=0
 	.if \start
@@ -173,12 +129,12 @@
  * initial frame state for interrupts (and exceptions without error code)
  */
 	.macro INTR_FRAME start=1 offset=0
-	EMPTY_FRAME \start, SS+8+\offset-RIP
-	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
-	CFI_REL_OFFSET rsp, RSP+\offset-RIP
-	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
-	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
-	CFI_REL_OFFSET rip, RIP+\offset-RIP
+	EMPTY_FRAME \start, 5*8+\offset
+	/*CFI_REL_OFFSET ss, 4*8+\offset*/
+	CFI_REL_OFFSET rsp, 3*8+\offset
+	/*CFI_REL_OFFSET rflags, 2*8+\offset*/
+	/*CFI_REL_OFFSET cs, 1*8+\offset*/
+	CFI_REL_OFFSET rip, 0*8+\offset
 	.endm
 
 /*
@@ -186,30 +142,23 @@
  * with vector already pushed)
  */
 	.macro XCPT_FRAME start=1 offset=0
-	INTR_FRAME \start, RIP+\offset-ORIG_RAX
-	.endm
-
-/*
- * frame that enables calling into C.
- */
-	.macro PARTIAL_FRAME start=1 offset=0
-	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
-	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
-	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
-	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+	INTR_FRAME \start, 1*8+\offset
 	.endm
 
 /*
  * frame that enables passing a complete pt_regs to a C function.
  */
 	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, R11+\offset-R15
+	XCPT_FRAME \start, ORIG_RAX+\offset
+	CFI_REL_OFFSET rdi, RDI+\offset
+	CFI_REL_OFFSET rsi, RSI+\offset
+	CFI_REL_OFFSET rdx, RDX+\offset
+	CFI_REL_OFFSET rcx, RCX+\offset
+	CFI_REL_OFFSET rax, RAX+\offset
+	CFI_REL_OFFSET r8, R8+\offset
+	CFI_REL_OFFSET r9, R9+\offset
+	CFI_REL_OFFSET r10, R10+\offset
+	CFI_REL_OFFSET r11, R11+\offset
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -218,105 +167,30 @@
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-ENTRY(save_paranoid)
-	XCPT_FRAME 1 RDI+8
-	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq %r8, R8+8(%rsp)
-	movq %r9, R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
-	movl $1,%ebx
-	movl $MSR_GS_BASE,%ecx
-	rdmsr
-	testl %edx,%edx
-	js 1f	/* negative -> in kernel */
-	SWAPGS
-	xorl %ebx,%ebx
-1:	ret
-	CFI_ENDPROC
-END(save_paranoid)
-
 /*
- * A newly forked process directly context switches into this address.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-	DEFAULT_FRAME
-
-	LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-	pushq_cfi $0x0002
-	popfq_cfi				# reset kernel eflags
-
-	call schedule_tail			# rdi: 'prev' task parameter
-
-	GET_THREAD_INFO(%rcx)
-
-	RESTORE_REST
-
-	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	jz   1f
-
-	/*
-	 * By the time we get here, we have no idea whether our pt_regs,
-	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-	 * the slow path, or one of the ia32entry paths.
-	 * Use int_ret_from_sys_call to return, since it can safely handle
-	 * all of the above.
-	 */
-	jmp  int_ret_from_sys_call
-
-1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq %rbp, %rdi
-	call *%rbx
-	movl $0, RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
  *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.  However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * Registers on entry:
  * rax  system call number
+ * rcx  return address
+ * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
  * rdx  arg2
- * r10  arg3 	(--> moved to rcx for C)
+ * r10  arg3 (needs to be moved to rcx to conform to C ABI)
  * r8   arg4
  * r9   arg5
- * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
- * Interrupts are off on entry.
  * Only called from user space.
  *
- * XXX	if we had a free scratch register we could save the RSP into the stack frame
- *      and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
+ * When user can change pt_regs->foo always force IRET. That is because
  * it deals with uncanonical addresses better. SYSRET has trouble
  * with them due to bugs in both AMD and Intel CPUs.
  */
@@ -324,9 +198,15 @@
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
+
+	/*
+	 * Interrupts are off on entry.
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	SWAPGS_UNSAFE_STACK
 	/*
 	 * A hypervisor implementation might want to use a label
@@ -335,18 +215,38 @@
 	 */
 GLOBAL(system_call_after_swapgs)
 
-	movq	%rsp,PER_CPU_VAR(old_rsp)
+	movq	%rsp,PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(kernel_stack),%rsp
+
+	/* Construct struct pt_regs on stack */
+	pushq_cfi $__USER_DS			/* pt_regs->ss */
+	pushq_cfi PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
 	/*
-	 * No need to follow this irqs off/on section - it's straight
-	 * and short:
+	 * Re-enable interrupts.
+	 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
+	 * must execute atomically in the face of possible interrupt-driven
+	 * task preemption. We must enable interrupts only after we're done
+	 * with using rsp_scratch:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
-	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	pushq_cfi	%r11			/* pt_regs->flags */
+	pushq_cfi	$__USER_CS		/* pt_regs->cs */
+	pushq_cfi	%rcx			/* pt_regs->ip */
+	CFI_REL_OFFSET rip,0
+	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
+	pushq_cfi_reg	rdi			/* pt_regs->di */
+	pushq_cfi_reg	rsi			/* pt_regs->si */
+	pushq_cfi_reg	rdx			/* pt_regs->dx */
+	pushq_cfi_reg	rcx			/* pt_regs->cx */
+	pushq_cfi	$-ENOSYS		/* pt_regs->ax */
+	pushq_cfi_reg	r8			/* pt_regs->r8 */
+	pushq_cfi_reg	r9			/* pt_regs->r9 */
+	pushq_cfi_reg	r10			/* pt_regs->r10 */
+	pushq_cfi_reg	r11			/* pt_regs->r11 */
+	sub	$(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
+	CFI_ADJUST_CFA_OFFSET 6*8
+
+	testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -355,18 +255,21 @@
 	andl $__SYSCALL_MASK,%eax
 	cmpl $__NR_syscall_max,%eax
 #endif
-	ja ret_from_sys_call  /* and return regs->ax */
+	ja	1f	/* return -ENOSYS (already in pt_regs->ax) */
 	movq %r10,%rcx
-	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	call *sys_call_table(,%rax,8)
+	movq %rax,RAX(%rsp)
+1:
 /*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
+ * Syscall return path ending with SYSRET (fast path).
+ * Has incompletely filled pt_regs.
  */
-ret_from_sys_call:
 	LOCKDEP_SYS_EXIT
+	/*
+	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+	 * it is too small to ever cause noticeable irq latency.
+	 */
 	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
 
 	/*
 	 * We must check ti flags with interrupts (or at least preemption)
@@ -376,72 +279,73 @@
 	 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
 	 * very bad.
 	 */
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
+	testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+	jnz int_ret_from_sys_call_irqs_off	/* Go to the slow path */
 
 	CFI_REMEMBER_STATE
-	/*
-	 * sysretq will re-enable interrupts:
-	 */
-	TRACE_IRQS_ON
-	movq RIP-ARGOFFSET(%rsp),%rcx
+
+	RESTORE_C_REGS_EXCEPT_RCX_R11
+	movq	RIP(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
+	movq	EFLAGS(%rsp),%r11
 	/*CFI_REGISTER	rflags,r11*/
-	movq	PER_CPU_VAR(old_rsp), %rsp
+	movq	RSP(%rsp),%rsp
+	/*
+	 * 64bit SYSRET restores rip from rcx,
+	 * rflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 * Restoration of rflags re-enables interrupts.
+	 */
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
 
-int_ret_from_sys_call_fixup:
-	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-	jmp int_ret_from_sys_call_irqs_off
-
-	/* Do syscall tracing */
+	/* Do syscall entry tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
-	movq $AUDIT_ARCH_X86_64, %rsi
+	movq %rsp, %rdi
+	movl $AUDIT_ARCH_X86_64, %esi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+	RESTORE_C_REGS_EXCEPT_RAX	/* else restore clobbered regs */
+	movq ORIG_RAX(%rsp), %rax
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %rdi
+	SAVE_EXTRA_REGS
 	movq %rsp, %rdi
-	movq $AUDIT_ARCH_X86_64, %rsi
+	movl $AUDIT_ARCH_X86_64, %esi
 	movq %rax,%rdx
 	call syscall_trace_enter_phase2
 
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
+	 * Reload registers from stack in case ptrace changed them.
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
-	RESTORE_REST
+	RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
 #else
 	andl $__SYSCALL_MASK,%eax
 	cmpl $__NR_syscall_max,%eax
 #endif
-	ja   int_ret_from_sys_call	/* RAX(%rsp) is already set */
+	ja	1f	/* return -ENOSYS (already in pt_regs->ax) */
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
-	/* Use IRET because user could have changed frame */
+	movq %rax,RAX(%rsp)
+1:
+	/* Use IRET because user could have changed pt_regs->foo */
 
 /*
  * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
+ * Has correct iret frame.
  */
 GLOBAL(int_ret_from_sys_call)
 	DISABLE_INTERRUPTS(CLBR_NONE)
+int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
 	TRACE_IRQS_OFF
-int_ret_from_sys_call_irqs_off:
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	mask to check */
 GLOBAL(int_with_check)
@@ -450,8 +354,8 @@
 	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
 	jnz   int_careful
-	andl    $~TS_COMPAT,TI_status(%rcx)
-	jmp   retint_swapgs
+	andl	$~TS_COMPAT,TI_status(%rcx)
+	jmp	syscall_return
 
 	/* Either reschedule or signal or syscall exit tracking needed. */
 	/* First do a reschedule test. */
@@ -468,12 +372,11 @@
 	TRACE_IRQS_OFF
 	jmp int_with_check
 
-	/* handle signals and tracing -- both require a full stack frame */
+	/* handle signals and tracing -- both require a full pt_regs */
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
@@ -492,277 +395,13 @@
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
-	CFI_ENDPROC
-END(system_call)
 
-	.macro FORK_LIKE func
-ENTRY(stub_\func)
-	CFI_STARTPROC
-	popq	%r11			/* save return address */
-	PARTIAL_FRAME 0
-	SAVE_REST
-	pushq	%r11			/* put it back on stack */
-	FIXUP_TOP_OF_STACK %r11, 8
-	DEFAULT_FRAME 0 8		/* offset 8: return address */
-	call sys_\func
-	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
-	CFI_ENDPROC
-END(stub_\func)
-	.endm
-
-	.macro FIXED_FRAME label,func
-ENTRY(\label)
-	CFI_STARTPROC
-	PARTIAL_FRAME 0 8		/* offset 8: return address */
-	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
-	call \func
-	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
-	ret
-	CFI_ENDPROC
-END(\label)
-	.endm
-
-	FORK_LIKE  clone
-	FORK_LIKE  fork
-	FORK_LIKE  vfork
-	FIXED_FRAME stub_iopl, sys_iopl
-
-ENTRY(stub_execve)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call sys_execve
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_execve)
-
-ENTRY(stub_execveat)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call sys_execveat
-	RESTORE_TOP_OF_STACK %r11
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_execveat)
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call sys_rt_sigreturn
-	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_rt_sigreturn)
-
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call sys32_x32_rt_sigreturn
-	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_x32_rt_sigreturn)
-
-ENTRY(stub_x32_execve)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call compat_sys_execve
-	RESTORE_TOP_OF_STACK %r11
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_x32_execve)
-
-ENTRY(stub_x32_execveat)
-	CFI_STARTPROC
-	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call compat_sys_execveat
-	RESTORE_TOP_OF_STACK %r11
-	movq %rax,RAX(%rsp)
-	RESTORE_REST
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_x32_execveat)
-
-#endif
-
-/*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
- */
-	.section .init.rodata,"a"
-ENTRY(interrupt)
-	.section .entry.text
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
-ENTRY(irq_entries_start)
-	INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
-	CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
-	CFI_ENDPROC
-END(irq_entries_start)
-
-.previous
-END(interrupt)
-.previous
-
-/*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
- *
- * Entry runs with interrupts off.
- */
-
-/* 0(%rsp): ~(interrupt number) */
-	.macro interrupt func
-	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
-	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
-
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
-	je 1f
-	SWAPGS
-	/*
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-1:	incl PER_CPU_VAR(irq_count)
-	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
-	pushq %rsi
-	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
-			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
-			0x22 /* DW_OP_plus */
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-
-	call \func
-	.endm
-
-	/*
-	 * The interrupt stubs push (~vector+0x80) onto the stack and
-	 * then jump to common_interrupt.
-	 */
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
-	XCPT_FRAME
-	ASM_CLAC
-	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
-	interrupt do_IRQ
-	/* 0(%rsp): old_rsp-ARGOFFSET */
-ret_from_intr:
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	decl PER_CPU_VAR(irq_count)
-
-	/* Restore saved previous stack */
-	popq %rsi
-	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
-	leaq ARGOFFSET-RBP(%rsi), %rsp
-	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
-
-exit_intr:
-	GET_THREAD_INFO(%rcx)
-	testl $3,CS-ARGOFFSET(%rsp)
-	je retint_kernel
-
-	/* Interrupt came from user space */
-	/*
-	 * Has a correct top of stack, but a partial stack frame
-	 * %rcx: thread info. Interrupts off.
-	 */
-retint_with_reschedule:
-	movl $_TIF_WORK_MASK,%edi
-retint_check:
-	LOCKDEP_SYS_EXIT_IRQ
-	movl TI_flags(%rcx),%edx
-	andl %edi,%edx
-	CFI_REMEMBER_STATE
-	jnz  retint_careful
-
-retint_swapgs:		/* return to user-space */
-	/*
-	 * The iretq could re-enable interrupts:
-	 */
+syscall_return:
+	/* The IRETQ could re-enable interrupts: */
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
 
@@ -770,12 +409,12 @@
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.
 	 */
-	movq (RCX-R11)(%rsp), %rcx
-	cmpq %rcx,(RIP-R11)(%rsp)		/* RCX == RIP */
+	movq RCX(%rsp),%rcx
+	cmpq %rcx,RIP(%rsp)		/* RCX == RIP */
 	jne opportunistic_sysret_failed
 
 	/*
-	 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
+	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
 	 * in kernel space.  This essentially lets the user take over
 	 * the kernel, since userspace controls RSP.  It's not worth
 	 * testing for canonicalness exactly -- this check detects any
@@ -787,16 +426,16 @@
 	 * to be updated to remain correct on both old and new CPUs.
 	 */
 	.ifne __VIRTUAL_MASK_SHIFT - 47
-	.error "virtual address width changed -- sysret checks need update"
+	.error "virtual address width changed -- SYSRET checks need update"
 	.endif
 	shr $__VIRTUAL_MASK_SHIFT, %rcx
 	jnz opportunistic_sysret_failed
 
-	cmpq $__USER_CS,(CS-R11)(%rsp)		/* CS must match SYSRET */
+	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
 	jne opportunistic_sysret_failed
 
-	movq (R11-ARGOFFSET)(%rsp), %r11
-	cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp)	/* R11 == RFLAGS */
+	movq R11(%rsp),%r11
+	cmpq %r11,EFLAGS(%rsp)		/* R11 == RFLAGS */
 	jne opportunistic_sysret_failed
 
 	/*
@@ -818,32 +457,333 @@
 
 	/* nothing to check for RSP */
 
-	cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp)	/* SS must match SYSRET */
+	cmpq $__USER_DS,SS(%rsp)	/* SS must match SYSRET */
 	jne opportunistic_sysret_failed
 
 	/*
 	 * We win!  This label is here just for ease of understanding
 	 * perf profiles.  Nothing jumps here.
 	 */
-irq_return_via_sysret:
+syscall_return_via_sysret:
 	CFI_REMEMBER_STATE
-	RESTORE_ARGS 1,8,1
-	movq (RSP-RIP)(%rsp),%rsp
+	/* r11 is already restored (see code above) */
+	RESTORE_C_REGS_EXCEPT_R11
+	movq RSP(%rsp),%rsp
 	USERGS_SYSRET64
 	CFI_RESTORE_STATE
 
 opportunistic_sysret_failed:
 	SWAPGS
-	jmp restore_args
+	jmp	restore_c_regs_and_iret
+	CFI_ENDPROC
+END(system_call)
 
-retint_restore_args:	/* return to kernel space */
+
+	.macro FORK_LIKE func
+ENTRY(stub_\func)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
+	SAVE_EXTRA_REGS 8
+	jmp sys_\func
+	CFI_ENDPROC
+END(stub_\func)
+	.endm
+
+	FORK_LIKE  clone
+	FORK_LIKE  fork
+	FORK_LIKE  vfork
+
+ENTRY(stub_execve)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	call	sys_execve
+return_from_execve:
+	testl	%eax, %eax
+	jz	1f
+	/* exec failed, can use fast SYSRET code path in this case */
+	ret
+1:
+	/* must use IRET code path (pt_regs->cs may have changed) */
+	addq	$8, %rsp
+	CFI_ADJUST_CFA_OFFSET -8
+	ZERO_EXTRA_REGS
+	movq	%rax,RAX(%rsp)
+	jmp	int_ret_from_sys_call
+	CFI_ENDPROC
+END(stub_execve)
+/*
+ * Remaining execve stubs are only 7 bytes long.
+ * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
+ */
+	.align	8
+GLOBAL(stub_execveat)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	call	sys_execveat
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub_execveat)
+
+#ifdef CONFIG_X86_X32_ABI
+	.align	8
+GLOBAL(stub_x32_execve)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	call	compat_sys_execve
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub_x32_execve)
+	.align	8
+GLOBAL(stub_x32_execveat)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	call	compat_sys_execveat
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub_x32_execveat)
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
+	.align	8
+GLOBAL(stub32_execve)
+	CFI_STARTPROC
+	call	compat_sys_execve
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub32_execve)
+	.align	8
+GLOBAL(stub32_execveat)
+	CFI_STARTPROC
+	call	compat_sys_execveat
+	jmp	return_from_execve
+	CFI_ENDPROC
+END(stub32_execveat)
+#endif
+
+/*
+ * sigreturn is special because it needs to restore all registers on return.
+ * This cannot be done with SYSRET, so use the IRET return path instead.
+ */
+ENTRY(stub_rt_sigreturn)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	/*
+	 * SAVE_EXTRA_REGS result is not normally needed:
+	 * sigreturn overwrites all pt_regs->GPREGS.
+	 * But sigreturn can fail (!), and there is no easy way to detect that.
+	 * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
+	 * we SAVE_EXTRA_REGS here.
+	 */
+	SAVE_EXTRA_REGS 8
+	call sys_rt_sigreturn
+return_from_stub:
+	addq	$8, %rsp
+	CFI_ADJUST_CFA_OFFSET -8
+	RESTORE_EXTRA_REGS
+	movq %rax,RAX(%rsp)
+	jmp int_ret_from_sys_call
+	CFI_ENDPROC
+END(stub_rt_sigreturn)
+
+#ifdef CONFIG_X86_X32_ABI
+ENTRY(stub_x32_rt_sigreturn)
+	CFI_STARTPROC
+	DEFAULT_FRAME 0, 8
+	SAVE_EXTRA_REGS 8
+	call sys32_x32_rt_sigreturn
+	jmp  return_from_stub
+	CFI_ENDPROC
+END(stub_x32_rt_sigreturn)
+#endif
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+	DEFAULT_FRAME
+
+	LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+	pushq_cfi $0x0002
+	popfq_cfi				# reset kernel eflags
+
+	call schedule_tail			# rdi: 'prev' task parameter
+
+	RESTORE_EXTRA_REGS
+
+	testl $3,CS(%rsp)			# from kernel_thread?
+
+	/*
+	 * By the time we get here, we have no idea whether our pt_regs,
+	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+	 * the slow path, or one of the ia32entry paths.
+	 * Use IRET code path to return, since it can safely handle
+	 * all of the above.
+	 */
+	jnz	int_ret_from_sys_call
+
+	/* We came from kernel_thread */
+	/* nb: we depend on RESTORE_EXTRA_REGS above */
+	movq %rbp, %rdi
+	call *%rbx
+	movl $0, RAX(%rsp)
+	RESTORE_EXTRA_REGS
+	jmp int_ret_from_sys_call
+	CFI_ENDPROC
+END(ret_from_fork)
+
+/*
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
+ */
+	.align 8
+ENTRY(irq_entries_start)
+	INTR_FRAME
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
+	CFI_ADJUST_CFA_OFFSET -8
+	.align	8
+    .endr
+	CFI_ENDPROC
+END(irq_entries_start)
+
+/*
+ * Interrupt entry/exit.
+ *
+ * Interrupt entry points save only callee clobbered registers in fast path.
+ *
+ * Entry runs with interrupts off.
+ */
+
+/* 0(%rsp): ~(interrupt number) */
+	.macro interrupt func
+	cld
+	/*
+	 * Since nothing in interrupt handling code touches r12...r15 members
+	 * of "struct pt_regs", and since interrupts can nest, we can save
+	 * four stack slots and simultaneously provide
+	 * an unwind-friendly stack layout by saving "truncated" pt_regs
+	 * exactly up to rbp slot, without these members.
+	 */
+	ALLOC_PT_GPREGS_ON_STACK -RBP
+	SAVE_C_REGS -RBP
+	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
+	SAVE_EXTRA_REGS_RBP -RBP
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
+
+	testl $3, CS-RBP(%rsp)
+	je 1f
+	SWAPGS
+1:
+	/*
+	 * Save previous stack pointer, optionally switch to interrupt stack.
+	 * irq_count is used to check if a CPU is already on an interrupt stack
+	 * or not. While this is essentially redundant with preempt_count it is
+	 * a little cheaper to use a separate counter in the PDA (short of
+	 * moving irq_enter into assembly, which would be too much work)
+	 */
+	movq %rsp, %rsi
+	incl PER_CPU_VAR(irq_count)
+	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
+	CFI_DEF_CFA_REGISTER	rsi
+	pushq %rsi
+	/*
+	 * For debugger:
+	 * "CFA (Current Frame Address) is the value on stack + offset"
+	 */
+	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
+			0x77 /* DW_OP_breg7 (rsp) */, 0, \
+			0x06 /* DW_OP_deref */, \
+			0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
+			0x22 /* DW_OP_plus */
+	/* We entered an interrupt context - irqs are off: */
+	TRACE_IRQS_OFF
+
+	call \func
+	.endm
+
+	/*
+	 * The interrupt stubs push (~vector+0x80) onto the stack and
+	 * then jump to common_interrupt.
+	 */
+	.p2align CONFIG_X86_L1_CACHE_SHIFT
+common_interrupt:
+	XCPT_FRAME
+	ASM_CLAC
+	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
+	interrupt do_IRQ
+	/* 0(%rsp): old RSP */
+ret_from_intr:
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	decl PER_CPU_VAR(irq_count)
+
+	/* Restore saved previous stack */
+	popq %rsi
+	CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
+	/* return code expects complete pt_regs - adjust rsp accordingly: */
+	leaq -RBP(%rsi),%rsp
+	CFI_DEF_CFA_REGISTER	rsp
+	CFI_ADJUST_CFA_OFFSET	RBP
+
+	testl $3,CS(%rsp)
+	je retint_kernel
+	/* Interrupt came from user space */
+
+	GET_THREAD_INFO(%rcx)
+	/*
+	 * %rcx: thread info. Interrupts off.
+	 */
+retint_with_reschedule:
+	movl $_TIF_WORK_MASK,%edi
+retint_check:
+	LOCKDEP_SYS_EXIT_IRQ
+	movl TI_flags(%rcx),%edx
+	andl %edi,%edx
+	CFI_REMEMBER_STATE
+	jnz  retint_careful
+
+retint_swapgs:		/* return to user-space */
+	/*
+	 * The iretq could re-enable interrupts:
+	 */
 	DISABLE_INTERRUPTS(CLBR_ANY)
+	TRACE_IRQS_IRETQ
+
+	SWAPGS
+	jmp	restore_c_regs_and_iret
+
+/* Returning to kernel space */
+retint_kernel:
+#ifdef CONFIG_PREEMPT
+	/* Interrupts are off */
+	/* Check if we need preemption */
+	bt	$9,EFLAGS(%rsp)	/* interrupts were off? */
+	jnc	1f
+0:	cmpl	$0,PER_CPU_VAR(__preempt_count)
+	jnz	1f
+	call	preempt_schedule_irq
+	jmp	0b
+1:
+#endif
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
 	TRACE_IRQS_IRETQ
-restore_args:
-	RESTORE_ARGS 1,8,1
+
+/*
+ * At this label, code paths which return to kernel and to user,
+ * which come from interrupts/exception and from syscalls, merge.
+ */
+restore_c_regs_and_iret:
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
 	INTERRUPT_RETURN
@@ -914,28 +854,17 @@
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
 	jmp retint_with_reschedule
 
-#ifdef CONFIG_PREEMPT
-	/* Returning to kernel space. Check if we need preemption */
-	/* rcx:	 threadinfo. interrupts off. */
-ENTRY(retint_kernel)
-	cmpl $0,PER_CPU_VAR(__preempt_count)
-	jnz  retint_restore_args
-	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
-	jnc  retint_restore_args
-	call preempt_schedule_irq
-	jmp exit_intr
-#endif
 	CFI_ENDPROC
 END(common_interrupt)
 
@@ -1024,7 +953,7 @@
 /*
  * Exception entry points.
  */
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -1046,8 +975,7 @@
 	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
 
 	.if \paranoid
 	.if \paranoid == 1
@@ -1055,10 +983,11 @@
 	testl $3, CS(%rsp)		/* If coming from userspace, switch */
 	jnz 1f				/* stacks. */
 	.endif
-	call save_paranoid
+	call paranoid_entry
 	.else
 	call error_entry
 	.endif
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
 	DEFAULT_FRAME 0
 
@@ -1080,19 +1009,20 @@
 	.endif
 
 	.if \shift_ist != -1
-	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+	subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
 	call \do_sym
 
 	.if \shift_ist != -1
-	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+	addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
+	/* these procedures expect "no swapgs" flag in ebx */
 	.if \paranoid
-	jmp paranoid_exit		/* %ebx: no swapgs flag */
+	jmp paranoid_exit
 	.else
-	jmp error_exit			/* %ebx: no swapgs flag */
+	jmp error_exit
 	.endif
 
 	.if \paranoid == 1
@@ -1296,7 +1226,9 @@
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
 	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
 	jmp error_exit
 	CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1328,59 +1260,66 @@
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
-	/*
-	 * "Paranoid" exit path from exception stack.  This is invoked
-	 * only on return from non-NMI IST interrupts that came
-	 * from kernel space.
-	 *
-	 * We may be returning to very strange contexts (e.g. very early
-	 * in syscall entry), so checking for preemption here would
-	 * be complicated.  Fortunately, we there's no good reason
-	 * to try to handle preemption here.
-	 */
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+	XCPT_FRAME 1 15*8
+	cld
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
+	movl $1,%ebx
+	movl $MSR_GS_BASE,%ecx
+	rdmsr
+	testl %edx,%edx
+	js 1f	/* negative -> in kernel */
+	SWAPGS
+	xorl %ebx,%ebx
+1:	ret
+	CFI_ENDPROC
+END(paranoid_entry)
 
-	/* ebx:	no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(paranoid_exit)
 	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF_DEBUG
 	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore
-	TRACE_IRQS_IRETQ 0
+	jnz paranoid_exit_no_swapgs
+	TRACE_IRQS_IRETQ
 	SWAPGS_UNSAFE_STACK
-	RESTORE_ALL 8
-	INTERRUPT_RETURN
-paranoid_restore:
-	TRACE_IRQS_IRETQ_DEBUG 0
-	RESTORE_ALL 8
+	jmp paranoid_exit_restore
+paranoid_exit_no_swapgs:
+	TRACE_IRQS_IRETQ_DEBUG
+paranoid_exit_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 	CFI_ENDPROC
 END(paranoid_exit)
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(error_entry)
-	XCPT_FRAME
-	CFI_ADJUST_CFA_OFFSET 15*8
-	/* oldrax contains error code */
+	XCPT_FRAME 1 15*8
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq %rdx, RDX+8(%rsp)
-	movq %rcx, RCX+8(%rsp)
-	movq %rax, RAX+8(%rsp)
-	movq  %r8,  R8+8(%rsp)
-	movq  %r9,  R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1390,12 +1329,12 @@
 	TRACE_IRQS_OFF
 	ret
 
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+	/*
+	 * There are two places in the kernel that can potentially fault with
+	 * usergs. Handle them here.  B stepping K8s sometimes report a
+	 * truncated RIP for IRET exceptions returning to compat mode. Check
+	 * for these here too.
+	 */
 error_kernelspace:
 	CFI_REL_OFFSET rcx, RCX+8
 	incl %ebx
@@ -1425,11 +1364,11 @@
 END(error_entry)
 
 
-/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1444,19 +1383,7 @@
 	CFI_ENDPROC
 END(error_exit)
 
-/*
- * Test if a given stack is an NMI stack or not.
- */
-	.macro test_in_nmi reg stack nmi_ret normal_ret
-	cmpq %\reg, \stack
-	ja \normal_ret
-	subq $EXCEPTION_STKSZ, %\reg
-	cmpq %\reg, \stack
-	jb \normal_ret
-	jmp \nmi_ret
-	.endm
-
-	/* runs on exception stack */
+/* Runs on exception stack */
 ENTRY(nmi)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1492,7 +1419,7 @@
 	 * NMI.
 	 */
 
-	/* Use %rdx as out temp variable throughout */
+	/* Use %rdx as our temp variable throughout */
 	pushq_cfi %rdx
 	CFI_REL_OFFSET rdx, 0
 
@@ -1517,8 +1444,17 @@
 	 * We check the variable because the first NMI could be in a
 	 * breakpoint routine using a breakpoint stack.
 	 */
-	lea 6*8(%rsp), %rdx
-	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+	lea	6*8(%rsp), %rdx
+	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
+	cmpq	%rdx, 4*8(%rsp)
+	/* If the stack pointer is above the NMI stack, this is a normal NMI */
+	ja	first_nmi
+	subq	$EXCEPTION_STKSZ, %rdx
+	cmpq	%rdx, 4*8(%rsp)
+	/* If it is below the NMI stack, it is a normal NMI */
+	jb	first_nmi
+	/* Ah, it is within the NMI stack, treat it as nested */
+
 	CFI_REMEMBER_STATE
 
 nested_nmi:
@@ -1611,7 +1547,7 @@
 	.rept 5
 	pushq_cfi 11*8(%rsp)
 	.endr
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 
 	/* Everything up to here is safe from nested NMIs */
 
@@ -1639,7 +1575,7 @@
 	pushq_cfi -6*8(%rsp)
 	.endr
 	subq $(5*8), %rsp
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 end_repeat_nmi:
 
 	/*
@@ -1648,16 +1584,16 @@
 	 * so that we repeat another NMI.
 	 */
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
+
 	/*
-	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
 	 * Even with normal interrupts enabled. An NMI should not be
 	 * setting NEED_RESCHED or anything that normal interrupts and
 	 * exceptions might do.
 	 */
-	call save_paranoid
+	call paranoid_entry
 	DEFAULT_FRAME 0
 
 	/*
@@ -1688,8 +1624,10 @@
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
 	/* Pop the extra iret frame at once */
-	RESTORE_ALL 6*8
+	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c4f8d46..2b55ee6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -177,9 +177,6 @@
 	 */
 	load_ucode_bsp();
 
-	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-		early_printk("Kernel alive\n");
-
 	clear_page(init_level4_pgt);
 	/* set init_level4_pgt kernel high mapping*/
 	init_level4_pgt[511] = early_level4_pgt[511];
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f36bd42..d031bad 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -22,6 +22,7 @@
 #include <asm/cpufeature.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include <asm/bootparam.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -90,7 +91,7 @@
 	
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
-	testb $(1<<6), BP_loadflags(%esi)
+	testb $KEEP_SEGMENTS, BP_loadflags(%esi)
 	jnz 2f
 
 /*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6fd514d9..ae6588b 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -1,5 +1,5 @@
 /*
- *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@@ -56,7 +56,7 @@
 	 * %rsi holds a physical pointer to real_mode_data.
 	 *
 	 * We come here either directly from a 64bit bootloader, or from
-	 * arch/x86_64/boot/compressed/head.S.
+	 * arch/x86/boot/compressed/head_64.S.
 	 *
 	 * We only come here initially at boot nothing else comes here.
 	 *
@@ -146,7 +146,7 @@
 	leaq	level2_kernel_pgt(%rip), %rdi
 	leaq	4096(%rdi), %r8
 	/* See if it is a valid page table entry */
-1:	testq	$1, 0(%rdi)
+1:	testb	$1, 0(%rdi)
 	jz	2f
 	addq	%rbp, 0(%rdi)
 	/* Go to the next page */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5651fc..367f39d 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -42,8 +42,8 @@
  * be set (so that the clts/stts pair does nothing that is
  * visible in the interrupted kernel thread).
  *
- * Except for the eagerfpu case when we return 1 unless we've already
- * been eager and saved the state in kernel_fpu_begin().
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
@@ -51,7 +51,7 @@
 		return false;
 
 	if (use_eager_fpu())
-		return __thread_has_fpu(current);
+		return true;
 
 	return !__thread_has_fpu(current) &&
 		(read_cr0() & X86_CR0_TS);
@@ -68,7 +68,7 @@
 static inline bool interrupted_user_mode(void)
 {
 	struct pt_regs *regs = get_irq_regs();
-	return regs && user_mode_vm(regs);
+	return regs && user_mode(regs);
 }
 
 /*
@@ -94,9 +94,10 @@
 
 	if (__thread_has_fpu(me)) {
 		__save_init_fpu(me);
-	} else if (!use_eager_fpu()) {
+	} else {
 		this_cpu_write(fpu_owner_task, NULL);
-		clts();
+		if (!use_eager_fpu())
+			clts();
 	}
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -107,7 +108,7 @@
 
 	if (__thread_has_fpu(me)) {
 		if (WARN_ON(restore_fpu_checking(me)))
-			drop_init_fpu(me);
+			fpu_reset_state(me);
 	} else if (!use_eager_fpu()) {
 		stts();
 	}
@@ -120,10 +121,13 @@
 {
 	preempt_disable();
 	if (__thread_has_fpu(tsk)) {
-		__save_init_fpu(tsk);
-		__thread_fpu_end(tsk);
-	} else
-		tsk->thread.fpu_counter = 0;
+		if (use_eager_fpu()) {
+			__save_fpu(tsk);
+		} else {
+			__save_init_fpu(tsk);
+			__thread_fpu_end(tsk);
+		}
+	}
 	preempt_enable();
 }
 EXPORT_SYMBOL(unlazy_fpu);
@@ -221,11 +225,12 @@
 		return;
 	}
 
+	memset(fpu->state, 0, xstate_size);
+
 	if (cpu_has_fxsr) {
 		fx_finit(&fpu->state->fxsave);
 	} else {
 		struct i387_fsave_struct *fp = &fpu->state->fsave;
-		memset(fp, 0, xstate_size);
 		fp->cwd = 0xffff037fu;
 		fp->swd = 0xffff0000u;
 		fp->twd = 0xffffffffu;
@@ -247,7 +252,7 @@
 	if (tsk_used_math(tsk)) {
 		if (cpu_has_fpu && tsk == current)
 			unlazy_fpu(tsk);
-		tsk->thread.fpu.last_cpu = ~0;
+		task_disable_lazy_fpu_restore(tsk);
 		return 0;
 	}
 
@@ -336,6 +341,7 @@
 		unsigned int pos, unsigned int count,
 		void *kbuf, void __user *ubuf)
 {
+	struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
 	int ret;
 
 	if (!cpu_has_xsave)
@@ -350,14 +356,12 @@
 	 * memory layout in the thread struct, so that we can copy the entire
 	 * xstateregs to the user using one user_regset_copyout().
 	 */
-	memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
-	       xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-
+	memcpy(&xsave->i387.sw_reserved,
+		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
 	/*
 	 * Copy the xstate memory layout.
 	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.fpu.state->xsave, 0, -1);
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	return ret;
 }
 
@@ -365,8 +369,8 @@
 		  unsigned int pos, unsigned int count,
 		  const void *kbuf, const void __user *ubuf)
 {
+	struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
 	int ret;
-	struct xsave_hdr_struct *xsave_hdr;
 
 	if (!cpu_has_xsave)
 		return -ENODEV;
@@ -375,22 +379,16 @@
 	if (ret)
 		return ret;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.fpu.state->xsave, 0, -1);
-
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
-	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
-	xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
-
-	xsave_hdr->xstate_bv &= pcntxt_mask;
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+	xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
 	/*
 	 * These bits must be zero.
 	 */
-	memset(xsave_hdr->reserved, 0, 48);
-
+	memset(&xsave->xsave_hdr.reserved, 0, 48);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 4ddaf66..37dae79 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -54,7 +54,7 @@
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = &per_cpu(cpu_tss, get_cpu());
 
 	if (turn_on)
 		bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 67b1cbe..e5952c2 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -295,7 +295,7 @@
 
 	this_cpu = smp_processor_id();
 	cpumask_copy(&online_new, cpu_online_mask);
-	cpu_clear(this_cpu, online_new);
+	cpumask_clear_cpu(this_cpu, &online_new);
 
 	this_count = 0;
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -307,7 +307,7 @@
 
 			data = irq_desc_get_irq_data(desc);
 			cpumask_copy(&affinity_new, data->affinity);
-			cpu_clear(this_cpu, affinity_new);
+			cpumask_clear_cpu(this_cpu, &affinity_new);
 
 			/* Do not count inactive or per-cpu irqs. */
 			if (!irq_has_action(irq) || irqd_is_per_cpu(data))
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 28d28f5..f9fd86a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -165,7 +165,7 @@
 	if (unlikely(!desc))
 		return false;
 
-	if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+	if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
 		desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index e4b503d..394e643 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -44,7 +44,7 @@
 	u64 estack_top, estack_bottom;
 	u64 curbase = (u64)task_stack_page(current);
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return;
 
 	if (regs->sp >= curbase + sizeof(struct thread_info) +
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 70e181e..cd10a64 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -178,7 +178,8 @@
 #endif
 	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+		set_intr_gate(i, irq_entries_start +
+				8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 #ifdef CONFIG_X86_LOCAL_APIC
 	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 25ecd56..d6178d9 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -126,11 +126,11 @@
 #ifdef CONFIG_X86_32
 	switch (regno) {
 	case GDB_SS:
-		if (!user_mode_vm(regs))
+		if (!user_mode(regs))
 			*(unsigned long *)mem = __KERNEL_DS;
 		break;
 	case GDB_SP:
-		if (!user_mode_vm(regs))
+		if (!user_mode(regs))
 			*(unsigned long *)mem = kernel_stack_pointer(regs);
 		break;
 	case GDB_GS:
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4e3d5a9..24d0796 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -602,7 +602,7 @@
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return 0;
 
 	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@@ -1007,7 +1007,7 @@
 	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
 
-	if (args->regs && user_mode_vm(args->regs))
+	if (args->regs && user_mode(args->regs))
 		return ret;
 
 	if (val == DIE_GPF) {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d1ac80b..005c03e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -33,6 +33,7 @@
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
@@ -47,21 +48,13 @@
 
 #ifdef CONFIG_RANDOMIZE_BASE
 static unsigned long module_load_offset;
-static int randomize_modules = 1;
 
 /* Mutex protects the module_load_offset. */
 static DEFINE_MUTEX(module_kaslr_mutex);
 
-static int __init parse_nokaslr(char *p)
-{
-	randomize_modules = 0;
-	return 0;
-}
-early_param("nokaslr", parse_nokaslr);
-
 static unsigned long int get_module_load_offset(void)
 {
-	if (randomize_modules) {
+	if (kaslr_enabled()) {
 		mutex_lock(&module_kaslr_mutex);
 		/*
 		 * Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 781861c..da8cb98 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -131,10 +131,11 @@
 	}
 
 	/*
-	 * RIP, flags, and the argument registers are usually saved.
-	 * orig_ax is probably okay, too.
+	 * These registers are always saved on 64-bit syscall entry.
+	 * On 32-bit entry points, they are saved too except r8..r11.
 	 */
 	regs_user_copy->ip = user_regs->ip;
+	regs_user_copy->ax = user_regs->ax;
 	regs_user_copy->cx = user_regs->cx;
 	regs_user_copy->dx = user_regs->dx;
 	regs_user_copy->si = user_regs->si;
@@ -145,9 +146,12 @@
 	regs_user_copy->r11 = user_regs->r11;
 	regs_user_copy->orig_ax = user_regs->orig_ax;
 	regs_user_copy->flags = user_regs->flags;
+	regs_user_copy->sp = user_regs->sp;
+	regs_user_copy->cs = user_regs->cs;
+	regs_user_copy->ss = user_regs->ss;
 
 	/*
-	 * Don't even try to report the "rest" regs.
+	 * Most system calls don't save these registers, don't report them.
 	 */
 	regs_user_copy->bx = -1;
 	regs_user_copy->bp = -1;
@@ -158,37 +162,13 @@
 
 	/*
 	 * For this to be at all useful, we need a reasonable guess for
-	 * sp and the ABI.  Be careful: we're in NMI context, and we're
+	 * the ABI.  Be careful: we're in NMI context, and we're
 	 * considering current to be the current task, so we should
 	 * be careful not to look at any other percpu variables that might
 	 * change during context switches.
 	 */
-	if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
-	    task_thread_info(current)->status & TS_COMPAT) {
-		/* Easy case: we're in a compat syscall. */
-		regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
-		regs_user_copy->sp = user_regs->sp;
-		regs_user_copy->cs = user_regs->cs;
-		regs_user_copy->ss = user_regs->ss;
-	} else if (user_regs->orig_ax != -1) {
-		/*
-		 * We're probably in a 64-bit syscall.
-		 * Warning: this code is severely racy.  At least it's better
-		 * than just blindly copying user_regs.
-		 */
-		regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
-		regs_user_copy->sp = this_cpu_read(old_rsp);
-		regs_user_copy->cs = __USER_CS;
-		regs_user_copy->ss = __USER_DS;
-		regs_user_copy->cx = -1;  /* usually contains garbage */
-	} else {
-		/* We're probably in an interrupt or exception. */
-		regs_user->abi = user_64bit_mode(user_regs) ?
-			PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
-		regs_user_copy->sp = user_regs->sp;
-		regs_user_copy->cs = user_regs->cs;
-		regs_user_copy->ss = user_regs->ss;
-	}
+	regs_user->abi = user_64bit_mode(user_regs) ?
+		PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
 
 	regs_user->regs = regs_user_copy;
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 046e2d6..8213da6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/random.h>
 #include <linux/user-return-notifier.h>
 #include <linux/dmi.h>
@@ -24,6 +24,7 @@
 #include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
+#include <asm/mwait.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
@@ -37,7 +38,26 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+	.x86_tss = {
+		.sp0 = TOP_OF_INIT_STACK,
+#ifdef CONFIG_X86_32
+		.ss0 = __KERNEL_DS,
+		.ss1 = __KERNEL_CS,
+		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
+#endif
+	 },
+#ifdef CONFIG_X86_32
+	 /*
+	  * Note that the .io_bitmap member must be extra-big. This is because
+	  * the CPU will access an additional byte beyond the end of the IO
+	  * permission bitmap. The extra byte must be all 1 bits, and must
+	  * be within the limit.
+	  */
+	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
+#endif
+};
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -69,8 +89,8 @@
 
 	dst->thread.fpu_counter = 0;
 	dst->thread.fpu.has_fpu = 0;
-	dst->thread.fpu.last_cpu = ~0;
 	dst->thread.fpu.state = NULL;
+	task_disable_lazy_fpu_restore(dst);
 	if (tsk_used_math(src)) {
 		int err = fpu_alloc(&dst->thread.fpu);
 		if (err)
@@ -109,7 +129,7 @@
 	unsigned long *bp = t->io_bitmap_ptr;
 
 	if (bp) {
-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+		struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
 
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
@@ -131,13 +151,18 @@
 
 	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
-	drop_init_fpu(tsk);
-	/*
-	 * Free the FPU state for non xsave platforms. They get reallocated
-	 * lazily at the first use.
-	 */
-	if (!use_eager_fpu())
+
+	if (!use_eager_fpu()) {
+		/* FPU state will be reallocated lazily at the first use. */
+		drop_fpu(tsk);
 		free_thread_xstate(tsk);
+	} else if (!used_math()) {
+		/* kthread execs. TODO: cleanup this horror. */
+		if (WARN_ON(init_fpu(tsk)))
+			force_sig(SIGKILL, tsk);
+		user_fpu_begin();
+		restore_init_xstate();
+	}
 }
 
 static void hard_disable_TSC(void)
@@ -377,14 +402,11 @@
 
 		if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
 			cpumask_set_cpu(cpu, amd_e400_c1e_mask);
-			/*
-			 * Force broadcast so ACPI can not interfere.
-			 */
-			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-					   &cpu);
+			/* Force broadcast so ACPI can not interfere. */
+			tick_broadcast_force();
 			pr_info("Switch to broadcast mode on CPU%d\n", cpu);
 		}
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+		tick_broadcast_enter();
 
 		default_idle();
 
@@ -393,12 +415,59 @@
 		 * called with interrupts disabled.
 		 */
 		local_irq_disable();
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		tick_broadcast_exit();
 		local_irq_enable();
 	} else
 		default_idle();
 }
 
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (!cpu_has(c, X86_FEATURE_MWAIT))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+	if (!current_set_polling_and_test()) {
+		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+			smp_mb(); /* quirk */
+			clflush((void *)&current_thread_info()->flags);
+			smp_mb(); /* quirk */
+		}
+
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else {
+		local_irq_enable();
+	}
+	__current_clr_polling();
+}
+
 void select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
@@ -412,6 +481,9 @@
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
 		pr_info("using AMD E400 aware idle routine\n");
 		x86_idle = amd_e400_idle;
+	} else if (prefer_mwait_c1_over_halt(c)) {
+		pr_info("using mwait in idle threads\n");
+		x86_idle = mwait_idle;
 	} else
 		x86_idle = default_idle;
 }
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 603c4f9..8ed2106 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -73,7 +73,7 @@
 	unsigned long sp;
 	unsigned short ss, gs;
 
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
 		gs = get_user_gs(regs);
@@ -206,11 +206,7 @@
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
 	regs->flags		= X86_EFLAGS_IF;
-	/*
-	 * force it to the iret return path by making it look as if there was
-	 * some work pending.
-	 */
-	set_thread_flag(TIF_NOTIFY_RESUME);
+	force_iret();
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
@@ -248,7 +244,7 @@
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	fpu_switch_t fpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -256,11 +252,6 @@
 	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
 	/*
-	 * Reload esp0.
-	 */
-	load_sp0(tss, next);
-
-	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
 	 * stack on entry.  No need to save %es and %ds, as those are
 	 * always kernel segments while inside the kernel.  Doing this
@@ -310,9 +301,17 @@
 	 */
 	arch_end_context_switch(next_p);
 
+	/*
+	 * Reload esp0, kernel_stack, and current_top_of_stack.  This changes
+	 * current_thread_info().
+	 */
+	load_sp0(tss, next);
 	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+		       (unsigned long)task_stack_page(next_p) +
+		       THREAD_SIZE);
+	this_cpu_write(cpu_current_top_of_stack,
+		       (unsigned long)task_stack_page(next_p) +
+		       THREAD_SIZE);
 
 	/*
 	 * Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 67fcc43..4baaa97 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
@@ -161,7 +161,6 @@
 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	p->thread.usersp = me->thread.usersp;
 	set_tsk_thread_flag(p, TIF_FORK);
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -207,7 +206,7 @@
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (test_thread_flag(TIF_IA32))
+		if (is_ia32_task())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)childregs->si, 0);
 		else
@@ -235,13 +234,12 @@
 	loadsegment(es, _ds);
 	loadsegment(ds, _ds);
 	load_gs_index(0);
-	current->thread.usersp	= new_sp;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
-	this_cpu_write(old_rsp, new_sp);
 	regs->cs		= _cs;
 	regs->ss		= _ss;
 	regs->flags		= X86_EFLAGS_IF;
+	force_iret();
 }
 
 void
@@ -277,15 +275,12 @@
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned fsindex, gsindex;
 	fpu_switch_t fpu;
 
 	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-	/* Reload esp0 and ss1. */
-	load_sp0(tss, next);
-
 	/* We must save %fs and %gs before load_TLS() because
 	 * %fs and %gs may be cleared by load_TLS().
 	 *
@@ -401,8 +396,6 @@
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
-	prev->usersp = this_cpu_read(old_rsp);
-	this_cpu_write(old_rsp, next->usersp);
 	this_cpu_write(current_task, next_p);
 
 	/*
@@ -413,9 +406,11 @@
 	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
 	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
 
+	/* Reload esp0 and ss1.  This changes current_thread_info(). */
+	load_sp0(tss, next);
+
 	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+		(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
@@ -602,6 +597,5 @@
 
 unsigned long KSTK_ESP(struct task_struct *task)
 {
-	return (test_tsk_thread_flag(task, TIF_IA32)) ?
-			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
+	return task_pt_regs(task)->sp;
 }
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e510618..a7bc794 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -364,18 +364,12 @@
 	case offsetof(struct user_regs_struct,cs):
 		if (unlikely(value == 0))
 			return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-		if (test_tsk_thread_flag(task, TIF_IA32))
-			task_pt_regs(task)->cs = value;
-#endif
+		task_pt_regs(task)->cs = value;
 		break;
 	case offsetof(struct user_regs_struct,ss):
 		if (unlikely(value == 0))
 			return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-		if (test_tsk_thread_flag(task, TIF_IA32))
-			task_pt_regs(task)->ss = value;
-#endif
+		task_pt_regs(task)->ss = value;
 		break;
 	}
 
@@ -1421,7 +1415,7 @@
 	memset(info, 0, sizeof(*info));
 	info->si_signo = SIGTRAP;
 	info->si_code = si_code;
-	info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+	info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 }
 
 void user_single_step_siginfo(struct task_struct *tsk,
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d2..e5ecd20 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,7 +141,46 @@
 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+	if (!pvclock_vdso_info) {
+		BUG();
+		return NULL;
+	}
+
+	return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+	return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
 #ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+			        void *v)
+{
+	struct task_migration_notifier *mn = v;
+	struct pvclock_vsyscall_time_info *pvti;
+
+	pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+	/* this is NULL when pvclock vsyscall is not initialized */
+	if (unlikely(pvti == NULL))
+		return NOTIFY_DONE;
+
+	pvti->migrate_count++;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+	.notifier_call = pvclock_task_migrate,
+};
+
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@
 
 	WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
+	pvclock_vdso_info = i;
+
 	for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
 		__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
 			     __pa(i) + (idx*PAGE_SIZE),
 			     PAGE_KERNEL_VVAR);
 	}
 
+
+	register_task_migration_notifier(&pvclock_migrate);
+
 	return 0;
 }
 #endif
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index e13f8e7..77630d5 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -226,23 +226,23 @@
 	movl	(%ebx), %ecx
 	addl	$4, %ebx
 1:
-	testl	$0x1,   %ecx  /* is it a destination page */
+	testb	$0x1, %cl     /* is it a destination page */
 	jz	2f
 	movl	%ecx,	%edi
 	andl	$0xfffff000, %edi
 	jmp     0b
 2:
-	testl	$0x2,	%ecx  /* is it an indirection page */
+	testb	$0x2, %cl    /* is it an indirection page */
 	jz	2f
 	movl	%ecx,	%ebx
 	andl	$0xfffff000, %ebx
 	jmp     0b
 2:
-	testl   $0x4,   %ecx /* is it the done indicator */
+	testb   $0x4, %cl    /* is it the done indicator */
 	jz      2f
 	jmp     3f
 2:
-	testl   $0x8,   %ecx /* is it the source indicator */
+	testb   $0x8, %cl    /* is it the source indicator */
 	jz      0b	     /* Ignore it otherwise */
 	movl    %ecx,   %esi /* For every source page do a copy */
 	andl    $0xfffff000, %esi
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 3fd2c69..98111b3 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -123,7 +123,7 @@
 	 * Set cr4 to a known state:
 	 *  - physical address extension enabled
 	 */
-	movq	$X86_CR4_PAE, %rax
+	movl	$X86_CR4_PAE, %eax
 	movq	%rax, %cr4
 
 	jmp 1f
@@ -221,23 +221,23 @@
 	movq	(%rbx), %rcx
 	addq	$8,	%rbx
 1:
-	testq	$0x1,	%rcx  /* is it a destination page? */
+	testb	$0x1,	%cl   /* is it a destination page? */
 	jz	2f
 	movq	%rcx,	%rdi
 	andq	$0xfffffffffffff000, %rdi
 	jmp	0b
 2:
-	testq	$0x2,	%rcx  /* is it an indirection page? */
+	testb	$0x2,	%cl   /* is it an indirection page? */
 	jz	2f
 	movq	%rcx,   %rbx
 	andq	$0xfffffffffffff000, %rbx
 	jmp	0b
 2:
-	testq	$0x4,	%rcx  /* is it the done indicator? */
+	testb	$0x4,	%cl   /* is it the done indicator? */
 	jz	2f
 	jmp	3f
 2:
-	testq	$0x8,	%rcx  /* is it the source indicator? */
+	testb	$0x8,	%cl   /* is it the source indicator? */
 	jz	0b	      /* Ignore it otherwise */
 	movq	%rcx,   %rsi  /* For ever source page do a copy */
 	andq	$0xfffffffffffff000, %rsi
@@ -246,17 +246,17 @@
 	movq	%rsi, %rax
 
 	movq	%r10, %rdi
-	movq	$512,   %rcx
+	movl	$512, %ecx
 	rep ; movsq
 
 	movq	%rax, %rdi
 	movq	%rdx, %rsi
-	movq	$512,   %rcx
+	movl	$512, %ecx
 	rep ; movsq
 
 	movq	%rdx, %rdi
 	movq	%r10, %rsi
-	movq	$512,   %rcx
+	movl	$512, %ecx
 	rep ; movsq
 
 	lea	PAGE_SIZE(%rax), %rsi
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0a2421c..d74ac33 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -354,7 +354,7 @@
 		mapaddr = ramdisk_image & PAGE_MASK;
 		p = early_memremap(mapaddr, clen+slop);
 		memcpy(q, p+slop, clen);
-		early_iounmap(p, clen+slop);
+		early_memunmap(p, clen+slop);
 		q += clen;
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
@@ -438,7 +438,7 @@
 		data_len = data->len + sizeof(struct setup_data);
 		data_type = data->type;
 		pa_next = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 
 		switch (data_type) {
 		case SETUP_E820_EXT:
@@ -470,7 +470,7 @@
 			 E820_RAM, E820_RESERVED_KERN);
 		found = 1;
 		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 	}
 	if (!found)
 		return;
@@ -491,7 +491,7 @@
 		data = early_memremap(pa_data, sizeof(*data));
 		memblock_reserve(pa_data, sizeof(*data) + data->len);
 		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 	}
 }
 
@@ -832,10 +832,15 @@
 static int
 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 {
-	pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
-		 "(relocation range: 0x%lx-0x%lx)\n",
-		 (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
-		 __START_KERNEL_map, MODULES_VADDR-1);
+	if (kaslr_enabled()) {
+		pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
+			 (unsigned long)&_text - __START_KERNEL,
+			 __START_KERNEL,
+			 __START_KERNEL_map,
+			 MODULES_VADDR-1);
+	} else {
+		pr_emerg("Kernel Offset: disabled\n");
+	}
 
 	return 0;
 }
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e504246..3e58186 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,8 +61,7 @@
 	regs->seg = GET_SEG(seg) | 3;			\
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-		       unsigned long *pax)
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
 	void __user *buf;
 	unsigned int tmpflags;
@@ -81,7 +80,7 @@
 #endif /* CONFIG_X86_32 */
 
 		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-		COPY(dx); COPY(cx); COPY(ip);
+		COPY(dx); COPY(cx); COPY(ip); COPY(ax);
 
 #ifdef CONFIG_X86_64
 		COPY(r8);
@@ -94,27 +93,20 @@
 		COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-		/* Kernel saves and restores only the CS segment register on signals,
-		 * which is the bare minimum needed to allow mixed 32/64-bit code.
-		 * App's signal handler can save/restore other segments if needed. */
-		COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
 
 		get_user_ex(buf, &sc->fpstate);
-
-		get_user_ex(*pax, &sc->ax);
 	} get_user_catch(err);
 
 	err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
 
+	force_iret();
+
 	return err;
 }
 
@@ -162,8 +154,9 @@
 #else /* !CONFIG_X86_32 */
 		put_user_ex(regs->flags, &sc->flags);
 		put_user_ex(regs->cs, &sc->cs);
-		put_user_ex(0, &sc->gs);
-		put_user_ex(0, &sc->fs);
+		put_user_ex(0, &sc->__pad2);
+		put_user_ex(0, &sc->__pad1);
+		put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
 		put_user_ex(fpstate, &sc->fpstate);
@@ -457,9 +450,19 @@
 
 	regs->sp = (unsigned long)frame;
 
-	/* Set up the CS register to run signal handlers in 64-bit mode,
-	   even if the handler happens to be interrupting 32-bit code. */
+	/*
+	 * Set up the CS and SS registers to run signal handlers in
+	 * 64-bit mode, even if the handler happens to be interrupting
+	 * 32-bit or 16-bit code.
+	 *
+	 * SS is subtle.  In 64-bit mode, we don't need any particular
+	 * SS descriptor, but we do need SS to be valid.  It's possible
+	 * that the old SS is entirely bogus -- this can happen if the
+	 * signal we're trying to deliver is #GP or #SS caused by a bad
+	 * SS value.
+	 */
 	regs->cs = __USER_CS;
+	regs->ss = __USER_DS;
 
 	return 0;
 }
@@ -539,7 +542,6 @@
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct sigframe __user *frame;
-	unsigned long ax;
 	sigset_t set;
 
 	frame = (struct sigframe __user *)(regs->sp - 8);
@@ -553,9 +555,9 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->sc, &ax))
+	if (restore_sigcontext(regs, &frame->sc))
 		goto badframe;
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "sigreturn");
@@ -568,7 +570,6 @@
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
-	unsigned long ax;
 	sigset_t set;
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@@ -579,13 +580,13 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "rt_sigreturn");
@@ -679,7 +680,7 @@
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
 		if (used_math())
-			drop_init_fpu(current);
+			fpu_reset_state(current);
 	}
 	signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
 }
@@ -780,7 +781,6 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
-	unsigned long ax;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -791,13 +791,13 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	return ax;
+	return regs->ax;
 
 badframe:
 	signal_fault(regs, frame, "x32 rt_sigreturn");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index febc6aa..7035f6b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -779,6 +779,26 @@
 	return boot_error;
 }
 
+void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	/* Just in case we booted with a single CPU. */
+	alternatives_enable_smp();
+
+	per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+	/* Stack for startup_32 can be just as for start_secondary onwards */
+	irq_ctx_init(cpu);
+	per_cpu(cpu_current_top_of_stack, cpu) =
+		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+#else
+	clear_tsk_thread_flag(idle, TIF_FORK);
+	initial_gs = per_cpu_offset(cpu);
+#endif
+	per_cpu(kernel_stack, cpu) =
+		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+}
+
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -796,23 +816,9 @@
 	int cpu0_nmi_registered = 0;
 	unsigned long timeout;
 
-	/* Just in case we booted with a single CPU. */
-	alternatives_enable_smp();
-
 	idle->thread.sp = (unsigned long) (((struct pt_regs *)
 			  (THREAD_SIZE +  task_stack_page(idle))) - 1);
-	per_cpu(current_task, cpu) = idle;
 
-#ifdef CONFIG_X86_32
-	/* Stack for startup_32 can be just as for start_secondary onwards */
-	irq_ctx_init(cpu);
-#else
-	clear_tsk_thread_flag(idle, TIF_FORK);
-	initial_gs = per_cpu_offset(cpu);
-#endif
-	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
@@ -953,6 +959,8 @@
 	/* the FPU context is blank, nobody can own it */
 	__cpu_disable_lazy_restore(cpu);
 
+	common_cpu_up(cpu, tidle);
+
 	err = do_boot_cpu(apicid, cpu, tidle);
 	if (err) {
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
@@ -1086,8 +1094,6 @@
 		return SMP_NO_APIC;
 	}
 
-	verify_local_APIC();
-
 	/*
 	 * If SMP should be disabled, then really disable it!
 	 */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 30277e2..10e0272 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -34,10 +34,26 @@
 	return va_align.mask;
 }
 
+/*
+ * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
+ * va_align.bits, [12:upper_bit), are set to a random value instead of
+ * zeroing them. This random value is computed once per boot. This form
+ * of ASLR is known as "per-boot ASLR".
+ *
+ * To achieve this, the random value is added to the info.align_offset
+ * value before calling vm_unmapped_area() or ORed directly to the
+ * address.
+ */
+static unsigned long get_align_bits(void)
+{
+	return va_align.bits & get_align_mask();
+}
+
 unsigned long align_vdso_addr(unsigned long addr)
 {
 	unsigned long align_mask = get_align_mask();
-	return (addr + align_mask) & ~align_mask;
+	addr = (addr + align_mask) & ~align_mask;
+	return addr | get_align_bits();
 }
 
 static int __init control_va_addr_alignment(char *str)
@@ -135,8 +151,12 @@
 	info.length = len;
 	info.low_limit = begin;
 	info.high_limit = end;
-	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
+	if (filp) {
+		info.align_mask = get_align_mask();
+		info.align_offset += get_align_bits();
+	}
 	return vm_unmapped_area(&info);
 }
 
@@ -174,8 +194,12 @@
 	info.length = len;
 	info.low_limit = PAGE_SIZE;
 	info.high_limit = mm->mmap_base;
-	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
+	if (filp) {
+		info.align_mask = get_align_mask();
+		info.align_offset += get_align_bits();
+	}
 	addr = vm_unmapped_area(&info);
 	if (!(addr & ~PAGE_MASK))
 		return addr;
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
index e9bcd57..3777189 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/kernel/syscall_32.c
@@ -5,21 +5,29 @@
 #include <linux/cache.h>
 #include <asm/asm-offsets.h>
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#ifdef CONFIG_IA32_EMULATION
+#define SYM(sym, compat) compat
+#else
+#define SYM(sym, compat) sym
+#define ia32_sys_call_table sys_call_table
+#define __NR_ia32_syscall_max __NR_syscall_max
+#endif
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
 
 typedef asmlinkage void (*sys_call_ptr_t)(void);
 
 extern asmlinkage void sys_ni_syscall(void);
 
-__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
 	/*
 	 * Smells like a compiler bug -- it doesn't work
 	 * when the & below is removed.
 	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
+	[0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 25adc0e..d39c091 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -30,7 +30,7 @@
 {
 	unsigned long pc = instruction_pointer(regs);
 
-	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+	if (!user_mode(regs) && in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
 		return *(unsigned long *)(regs->bp + sizeof(long));
 #else
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4ff5d16..f4fa991 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -112,7 +112,7 @@
 {
 	enum ctx_state prev_state;
 
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		/* Other than that, we're just an exception. */
 		prev_state = exception_enter();
 	} else {
@@ -146,7 +146,7 @@
 	/* Must be before exception_exit. */
 	preempt_count_sub(HARDIRQ_OFFSET);
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return exception_exit(prev_state);
 	else
 		rcu_nmi_exit();
@@ -158,7 +158,7 @@
  *
  * IST exception handlers normally cannot schedule.  As a special
  * exception, if the exception interrupted userspace code (i.e.
- * user_mode_vm(regs) would return true) and the exception was not
+ * user_mode(regs) would return true) and the exception was not
  * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
  * begins a non-atomic section within an ist_enter()/ist_exit() region.
  * Callers are responsible for enabling interrupts themselves inside
@@ -167,15 +167,15 @@
  */
 void ist_begin_non_atomic(struct pt_regs *regs)
 {
-	BUG_ON(!user_mode_vm(regs));
+	BUG_ON(!user_mode(regs));
 
 	/*
 	 * Sanity check: we need to be on the normal thread stack.  This
 	 * will catch asm bugs and any attempt to use ist_preempt_enable
 	 * from double_fault.
 	 */
-	BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
-		& ~(THREAD_SIZE - 1)) != 0);
+	BUG_ON((unsigned long)(current_top_of_stack() -
+			       current_stack_pointer()) >= THREAD_SIZE);
 
 	preempt_count_sub(HARDIRQ_OFFSET);
 }
@@ -194,8 +194,7 @@
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		  struct pt_regs *regs,	long error_code)
 {
-#ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		/*
 		 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 		 * On nmi (interrupt 2), do_trap should not be called.
@@ -207,7 +206,7 @@
 		}
 		return -1;
 	}
-#endif
+
 	if (!user_mode(regs)) {
 		if (!fixup_exception(regs)) {
 			tsk->thread.error_code = error_code;
@@ -384,7 +383,7 @@
 		goto exit;
 	conditional_sti(regs);
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 		die("bounds", regs, error_code);
 
 	if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@@ -462,13 +461,11 @@
 	prev_state = exception_enter();
 	conditional_sti(regs);
 
-#ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 		goto exit;
 	}
-#endif
 
 	tsk = current;
 	if (!user_mode(regs)) {
@@ -587,7 +584,7 @@
 	/* Copy the remainder of the stack from the current stack. */
 	memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
 
-	BUG_ON(!user_mode_vm(&new_stack->regs));
+	BUG_ON(!user_mode(&new_stack->regs));
 	return new_stack;
 }
 NOKPROBE_SYMBOL(fixup_bad_iret);
@@ -637,7 +634,7 @@
 	 * then it's very likely the result of an icebp/int01 trap.
 	 * User wants a sigtrap for that.
 	 */
-	if (!dr6 && user_mode_vm(regs))
+	if (!dr6 && user_mode(regs))
 		user_icebp = 1;
 
 	/* Catch kmemcheck conditions first of all! */
@@ -673,7 +670,7 @@
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
 					X86_TRAP_DB);
 		preempt_conditional_cli(regs);
@@ -721,7 +718,7 @@
 		return;
 	conditional_sti(regs);
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 	{
 		if (!fixup_exception(regs)) {
 			task->thread.error_code = error_code;
@@ -734,7 +731,7 @@
 	/*
 	 * Save the info for the exception handler and clear the error.
 	 */
-	save_init_fpu(task);
+	unlazy_fpu(task);
 	task->thread.trap_nr = trapnr;
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
@@ -863,7 +860,7 @@
 	kernel_fpu_disable();
 	__thread_fpu_begin(tsk);
 	if (unlikely(restore_fpu_checking(tsk))) {
-		drop_init_fpu(tsk);
+		fpu_reset_state(tsk);
 		force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
 	} else {
 		tsk->thread.fpu_counter++;
@@ -925,9 +922,21 @@
 /* Set of traps needed for early debugging. */
 void __init early_trap_init(void)
 {
-	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+	/*
+	 * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
+	 * is ready in cpu_init() <-- trap_init(). Before trap_init(),
+	 * CPU runs at ring 0 so it is impossible to hit an invalid
+	 * stack.  Using the original stack works well enough at this
+	 * early stage. DEBUG_STACK will be equipped after cpu_init() in
+	 * trap_init().
+	 *
+	 * We don't need to set trace_idt_table like set_intr_gate(),
+	 * since we don't have trace_debug and it will be reset to
+	 * 'debug' in trap_init() by set_intr_gate_ist().
+	 */
+	set_intr_gate_notrace(X86_TRAP_DB, debug);
 	/* int3 can be called from all */
-	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+	set_system_intr_gate(X86_TRAP_BP, &int3);
 #ifdef CONFIG_X86_32
 	set_intr_gate(X86_TRAP_PF, page_fault);
 #endif
@@ -1005,6 +1014,15 @@
 	 */
 	cpu_init();
 
+	/*
+	 * X86_TRAP_DB and X86_TRAP_BP have been set
+	 * in early_trap_init(). However, ITS works only after
+	 * cpu_init() loads TSS. See comments in early_trap_init().
+	 */
+	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+	/* int3 can be called from all */
+	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+
 	x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 81f8adb0..0b81ad6 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -912,7 +912,7 @@
 	int ret = NOTIFY_DONE;
 
 	/* We are only interested in userspace traps */
-	if (regs && !user_mode_vm(regs))
+	if (regs && !user_mode(regs))
 		return NOTIFY_DONE;
 
 	switch (val) {
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e8edcf5..fc9db6e 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -150,7 +150,7 @@
 		do_exit(SIGSEGV);
 	}
 
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = &per_cpu(cpu_tss, get_cpu());
 	current->thread.sp0 = current->thread.saved_sp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
 	load_sp0(tss, &current->thread);
@@ -318,7 +318,7 @@
 	tsk->thread.saved_fs = info->regs32->fs;
 	tsk->thread.saved_gs = get_user_gs(info->regs32);
 
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = &per_cpu(cpu_tss, get_cpu());
 	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c7d791f..51e3304 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@
 	gtod_write_begin(vdata);
 
 	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
-	vdata->cycle_last	= tk->tkr.cycle_last;
-	vdata->mask		= tk->tkr.mask;
-	vdata->mult		= tk->tkr.mult;
-	vdata->shift		= tk->tkr.shift;
+	vdata->vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->cycle_last	= tk->tkr_mono.cycle_last;
+	vdata->mask		= tk->tkr_mono.mask;
+	vdata->mult		= tk->tkr_mono.mult;
+	vdata->shift		= tk->tkr_mono.shift;
 
 	vdata->wall_time_sec		= tk->xtime_sec;
-	vdata->wall_time_snsec		= tk->tkr.xtime_nsec;
+	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
 
 	vdata->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_snsec	= tk->tkr.xtime_nsec
+	vdata->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr.shift);
+						<< tk->tkr_mono.shift);
 	while (vdata->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		vdata->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr.shift;
+					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
 		vdata->monotonic_time_sec++;
 	}
 
 	vdata->wall_time_coarse_sec	= tk->xtime_sec;
-	vdata->wall_time_coarse_nsec	= (long)(tk->tkr.xtime_nsec >>
-						 tk->tkr.shift);
+	vdata->wall_time_coarse_nsec	= (long)(tk->tkr_mono.xtime_nsec >>
+						 tk->tkr_mono.shift);
 
 	vdata->monotonic_time_coarse_sec =
 		vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index cdc6cf9..87a815b 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -342,7 +342,7 @@
 			 config_enabled(CONFIG_IA32_EMULATION));
 
 	if (!buf) {
-		drop_init_fpu(tsk);
+		fpu_reset_state(tsk);
 		return 0;
 	}
 
@@ -416,7 +416,7 @@
 		 */
 		user_fpu_begin();
 		if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
-			drop_init_fpu(tsk);
+			fpu_reset_state(tsk);
 			return -1;
 		}
 	}
@@ -678,19 +678,13 @@
 	this_func();
 }
 
-static inline void __init eager_fpu_init_bp(void)
+/*
+ * setup_init_fpu_buf() is __init and it is OK to call it here because
+ * init_xstate_buf will be unset only once during boot.
+ */
+void __init_refok eager_fpu_init(void)
 {
-	current->thread.fpu.state =
-	    alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
-	if (!init_xstate_buf)
-		setup_init_fpu_buf();
-}
-
-void eager_fpu_init(void)
-{
-	static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
-
-	clear_used_math();
+	WARN_ON(used_math());
 	current_thread_info()->status = 0;
 
 	if (eagerfpu == ENABLE)
@@ -701,21 +695,8 @@
 		return;
 	}
 
-	if (boot_func) {
-		boot_func();
-		boot_func = NULL;
-	}
-
-	/*
-	 * This is same as math_state_restore(). But use_xsave() is
-	 * not yet patched to use math_state_restore().
-	 */
-	init_fpu(current);
-	__thread_fpu_begin(current);
-	if (cpu_has_xsave)
-		xrstor_state(init_xstate_buf, -1);
-	else
-		fxrstor_checking(&init_xstate_buf->i387);
+	if (!init_xstate_buf)
+		setup_init_fpu_buf();
 }
 
 /*
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 08f790d..16e8f96 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,5 +1,5 @@
 
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
 
 CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8a80737..59b69f6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -104,6 +104,9 @@
 		((best->eax & 0xff00) >> 8) != 0)
 		return -EINVAL;
 
+	/* Update physical-address width */
+	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
 	kvm_pmu_cpuid_update(vcpu);
 	return 0;
 }
@@ -135,6 +138,21 @@
 	}
 }
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+	if (!best || best->eax < 0x80000008)
+		goto not_found;
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+not_found:
+	return 36;
+}
+EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
+
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
@@ -757,21 +775,6 @@
 }
 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
-	if (!best || best->eax < 0x80000008)
-		goto not_found;
-	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-	if (best)
-		return best->eax & 0xff;
-not_found:
-	return 36;
-}
-EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
-
 /*
  * If no match is found, check whether we exceed the vCPU's limit
  * and return the content of the highest valid _standard_ leaf instead.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 4452eed..c3b1ad9 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -20,13 +20,19 @@
 			      struct kvm_cpuid_entry2 __user *entries);
 void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
+
+static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.maxphyaddr;
+}
 
 static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 
 	if (!static_cpu_has(X86_FEATURE_XSAVE))
-		return 0;
+		return false;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
 	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 106c015..630bcb0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -248,27 +248,7 @@
 	struct opcode mode64;
 };
 
-/* EFLAGS bit definitions. */
-#define EFLG_ID (1<<21)
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_IOPL (3<<12)
-#define EFLG_NT (1<<14)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
-#define EFLG_RESERVED_ONE_MASK 2
 
 enum x86_transfer_type {
 	X86_TRANSFER_NONE,
@@ -317,7 +297,8 @@
  * These EFLAGS bits are restored from saved value during emulation, and
  * any changes are written back to the saved value after emulation.
  */
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
+		     X86_EFLAGS_PF|X86_EFLAGS_CF)
 
 #ifdef CONFIG_X86_64
 #define ON64(x) x
@@ -478,6 +459,25 @@
 	*dest = (*dest & ~mask) | (src & mask);
 }
 
+static void assign_register(unsigned long *reg, u64 val, int bytes)
+{
+	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+	switch (bytes) {
+	case 1:
+		*(u8 *)reg = (u8)val;
+		break;
+	case 2:
+		*(u16 *)reg = (u16)val;
+		break;
+	case 4:
+		*reg = (u32)val;
+		break;	/* 64b: zero-extend */
+	case 8:
+		*reg = val;
+		break;
+	}
+}
+
 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
 {
 	return (1UL << (ctxt->ad_bytes << 3)) - 1;
@@ -943,6 +943,22 @@
 
 FASTOP2R(cmp, cmp_r);
 
+static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
+{
+	/* If src is zero, do not writeback, but update flags */
+	if (ctxt->src.val == 0)
+		ctxt->dst.type = OP_NONE;
+	return fastop(ctxt, em_bsf);
+}
+
+static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+{
+	/* If src is zero, do not writeback, but update flags */
+	if (ctxt->src.val == 0)
+		ctxt->dst.type = OP_NONE;
+	return fastop(ctxt, em_bsr);
+}
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1399,7 +1415,7 @@
 		unsigned int in_page, n;
 		unsigned int count = ctxt->rep_prefix ?
 			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
-		in_page = (ctxt->eflags & EFLG_DF) ?
+		in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
 			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
 			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
 		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
@@ -1412,7 +1428,7 @@
 	}
 
 	if (ctxt->rep_prefix && (ctxt->d & String) &&
-	    !(ctxt->eflags & EFLG_DF)) {
+	    !(ctxt->eflags & X86_EFLAGS_DF)) {
 		ctxt->dst.data = rc->data + rc->pos;
 		ctxt->dst.type = OP_MEM_STR;
 		ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1691,21 +1707,7 @@
 
 static void write_register_operand(struct operand *op)
 {
-	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-	switch (op->bytes) {
-	case 1:
-		*(u8 *)op->addr.reg = (u8)op->val;
-		break;
-	case 2:
-		*(u16 *)op->addr.reg = (u16)op->val;
-		break;
-	case 4:
-		*op->addr.reg = (u32)op->val;
-		break;	/* 64b: zero-extend */
-	case 8:
-		*op->addr.reg = op->val;
-		break;
-	}
+	return assign_register(op->addr.reg, op->val, op->bytes);
 }
 
 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1792,32 +1794,34 @@
 {
 	int rc;
 	unsigned long val, change_mask;
-	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
 	int cpl = ctxt->ops->cpl(ctxt);
 
 	rc = emulate_pop(ctxt, &val, len);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
-		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
+	change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+		      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
+		      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
+		      X86_EFLAGS_AC | X86_EFLAGS_ID;
 
 	switch(ctxt->mode) {
 	case X86EMUL_MODE_PROT64:
 	case X86EMUL_MODE_PROT32:
 	case X86EMUL_MODE_PROT16:
 		if (cpl == 0)
-			change_mask |= EFLG_IOPL;
+			change_mask |= X86_EFLAGS_IOPL;
 		if (cpl <= iopl)
-			change_mask |= EFLG_IF;
+			change_mask |= X86_EFLAGS_IF;
 		break;
 	case X86EMUL_MODE_VM86:
 		if (iopl < 3)
 			return emulate_gp(ctxt, 0);
-		change_mask |= EFLG_IF;
+		change_mask |= X86_EFLAGS_IF;
 		break;
 	default: /* real mode */
-		change_mask |= (EFLG_IOPL | EFLG_IF);
+		change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
 		break;
 	}
 
@@ -1918,7 +1922,7 @@
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
+	ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
 	return em_push(ctxt);
 }
 
@@ -1926,6 +1930,7 @@
 {
 	int rc = X86EMUL_CONTINUE;
 	int reg = VCPU_REGS_RDI;
+	u32 val;
 
 	while (reg >= VCPU_REGS_RAX) {
 		if (reg == VCPU_REGS_RSP) {
@@ -1933,9 +1938,10 @@
 			--reg;
 		}
 
-		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
+		rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
 		if (rc != X86EMUL_CONTINUE)
 			break;
+		assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
 		--reg;
 	}
 	return rc;
@@ -1956,7 +1962,7 @@
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+	ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
 
 	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
 	rc = em_push(ctxt);
@@ -2022,10 +2028,14 @@
 	unsigned long temp_eip = 0;
 	unsigned long temp_eflags = 0;
 	unsigned long cs = 0;
-	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
-			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
-			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
-	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+	unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+			     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
+			     X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
+			     X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
+			     X86_EFLAGS_AC | X86_EFLAGS_ID |
+			     X86_EFLAGS_FIXED;
+	unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+				  X86_EFLAGS_VIP;
 
 	/* TODO: Add stack limit check */
 
@@ -2054,7 +2064,6 @@
 
 	ctxt->_eip = temp_eip;
 
-
 	if (ctxt->op_bytes == 4)
 		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
 	else if (ctxt->op_bytes == 2) {
@@ -2063,7 +2072,7 @@
 	}
 
 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
-	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+	ctxt->eflags |= X86_EFLAGS_FIXED;
 	ctxt->ops->set_nmi_mask(ctxt, false);
 
 	return rc;
@@ -2145,12 +2154,12 @@
 	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
 		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
 		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
-		ctxt->eflags &= ~EFLG_ZF;
+		ctxt->eflags &= ~X86_EFLAGS_ZF;
 	} else {
 		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
 			(u32) reg_read(ctxt, VCPU_REGS_RBX);
 
-		ctxt->eflags |= EFLG_ZF;
+		ctxt->eflags |= X86_EFLAGS_ZF;
 	}
 	return X86EMUL_CONTINUE;
 }
@@ -2222,7 +2231,7 @@
 	ctxt->src.val = ctxt->dst.orig_val;
 	fastop(ctxt, em_cmp);
 
-	if (ctxt->eflags & EFLG_ZF) {
+	if (ctxt->eflags & X86_EFLAGS_ZF) {
 		/* Success: write back to memory; no update of EAX */
 		ctxt->src.type = OP_NONE;
 		ctxt->dst.val = ctxt->src.orig_val;
@@ -2381,14 +2390,14 @@
 
 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~msr_data;
-		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+		ctxt->eflags |= X86_EFLAGS_FIXED;
 #endif
 	} else {
 		/* legacy mode */
 		ops->get_msr(ctxt, MSR_STAR, &msr_data);
 		ctxt->_eip = (u32)msr_data;
 
-		ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
+		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	}
 
 	return X86EMUL_CONTINUE;
@@ -2425,8 +2434,8 @@
 	if ((msr_data & 0xfffc) == 0x0)
 		return emulate_gp(ctxt, 0);
 
-	ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
-	cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
+	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
+	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
 	ss_sel = cs_sel + 8;
 	if (efer & EFER_LMA) {
 		cs.d = 0;
@@ -2493,8 +2502,8 @@
 			return emulate_gp(ctxt, 0);
 		break;
 	}
-	cs_sel |= SELECTOR_RPL_MASK;
-	ss_sel |= SELECTOR_RPL_MASK;
+	cs_sel |= SEGMENT_RPL_MASK;
+	ss_sel |= SEGMENT_RPL_MASK;
 
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
@@ -2512,7 +2521,7 @@
 		return false;
 	if (ctxt->mode == X86EMUL_MODE_VM86)
 		return true;
-	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
 	return ctxt->ops->cpl(ctxt) > iopl;
 }
 
@@ -2782,10 +2791,8 @@
 		return ret;
 	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
 					X86_TRANSFER_TASK_SWITCH, NULL);
-	if (ret != X86EMUL_CONTINUE)
-		return ret;
 
-	return X86EMUL_CONTINUE;
+	return ret;
 }
 
 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
@@ -2954,7 +2961,7 @@
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 		struct operand *op)
 {
-	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
+	int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
 
 	register_address_increment(ctxt, reg, df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, reg);
@@ -3323,7 +3330,7 @@
 	return X86EMUL_CONTINUE;
 }
 
-static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+static int em_hypercall(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = ctxt->ops->fix_hypercall(ctxt);
 
@@ -3395,17 +3402,6 @@
 	return em_lgdt_lidt(ctxt, true);
 }
 
-static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
-{
-	int rc;
-
-	rc = ctxt->ops->fix_hypercall(ctxt);
-
-	/* Disable writeback. */
-	ctxt->dst.type = OP_NONE;
-	return rc;
-}
-
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
 	return em_lgdt_lidt(ctxt, false);
@@ -3504,7 +3500,8 @@
 {
 	u32 flags;
 
-	flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+	flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+		X86_EFLAGS_SF;
 	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
 
 	ctxt->eflags &= ~0xffUL;
@@ -3769,7 +3766,7 @@
 
 static const struct opcode group7_rm0[] = {
 	N,
-	I(SrcNone | Priv | EmulateOnUD,	em_vmcall),
+	I(SrcNone | Priv | EmulateOnUD,	em_hypercall),
 	N, N, N, N, N, N,
 };
 
@@ -3781,7 +3778,7 @@
 
 static const struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
-	II(SrcNone  | Prot | EmulateOnUD,	em_vmmcall,	vmmcall),
+	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
 	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
 	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
 	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
@@ -4192,7 +4189,8 @@
 	N, N,
 	G(BitOp, group8),
 	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
-	F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
+	I(DstReg | SrcMem | ModRM, em_bsf_c),
+	I(DstReg | SrcMem | ModRM, em_bsr_c),
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
 	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
@@ -4759,9 +4757,9 @@
 	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
 	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
 	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
-		 ((ctxt->eflags & EFLG_ZF) == 0))
+		 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
 		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
-		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+		    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
 		return true;
 
 	return false;
@@ -4913,7 +4911,7 @@
 			/* All REP prefixes have the same first termination condition */
 			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
 				ctxt->eip = ctxt->_eip;
-				ctxt->eflags &= ~EFLG_RF;
+				ctxt->eflags &= ~X86_EFLAGS_RF;
 				goto done;
 			}
 		}
@@ -4963,9 +4961,9 @@
 	}
 
 	if (ctxt->rep_prefix && (ctxt->d & String))
-		ctxt->eflags |= EFLG_RF;
+		ctxt->eflags |= X86_EFLAGS_RF;
 	else
-		ctxt->eflags &= ~EFLG_RF;
+		ctxt->eflags &= ~X86_EFLAGS_RF;
 
 	if (ctxt->execute) {
 		if (ctxt->d & Fastop) {
@@ -5014,7 +5012,7 @@
 		rc = emulate_int(ctxt, ctxt->src.val);
 		break;
 	case 0xce:		/* into */
-		if (ctxt->eflags & EFLG_OF)
+		if (ctxt->eflags & X86_EFLAGS_OF)
 			rc = emulate_int(ctxt, 4);
 		break;
 	case 0xe9: /* jmp rel */
@@ -5027,19 +5025,19 @@
 		break;
 	case 0xf5:	/* cmc */
 		/* complement carry flag from eflags reg */
-		ctxt->eflags ^= EFLG_CF;
+		ctxt->eflags ^= X86_EFLAGS_CF;
 		break;
 	case 0xf8: /* clc */
-		ctxt->eflags &= ~EFLG_CF;
+		ctxt->eflags &= ~X86_EFLAGS_CF;
 		break;
 	case 0xf9: /* stc */
-		ctxt->eflags |= EFLG_CF;
+		ctxt->eflags |= X86_EFLAGS_CF;
 		break;
 	case 0xfc: /* cld */
-		ctxt->eflags &= ~EFLG_DF;
+		ctxt->eflags &= ~X86_EFLAGS_DF;
 		break;
 	case 0xfd: /* std */
-		ctxt->eflags |= EFLG_DF;
+		ctxt->eflags |= X86_EFLAGS_DF;
 		break;
 	default:
 		goto cannot_emulate;
@@ -5100,7 +5098,7 @@
 			}
 			goto done; /* skip rip writeback */
 		}
-		ctxt->eflags &= ~EFLG_RF;
+		ctxt->eflags &= ~X86_EFLAGS_RF;
 	}
 
 	ctxt->eip = ctxt->_eip;
@@ -5137,8 +5135,7 @@
 	case 0x40 ... 0x4f:	/* cmov */
 		if (test_cc(ctxt->b, ctxt->eflags))
 			ctxt->dst.val = ctxt->src.val;
-		else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
-			 ctxt->op_bytes != 4)
+		else if (ctxt->op_bytes != 4)
 			ctxt->dst.type = OP_NONE; /* no writeback */
 		break;
 	case 0x80 ... 0x8f: /* jnz rel, etc*/
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 298781d..4dce6f8 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -443,7 +443,8 @@
 		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
 }
 
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+				struct kvm_io_device *this,
 			    gpa_t addr, int len, const void *data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@
 	return 0;
 }
 
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+			   struct kvm_io_device *this,
 			   gpa_t addr, int len, void *data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@
 	return 0;
 }
 
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int speaker_ioport_write(struct kvm_vcpu *vcpu,
+				struct kvm_io_device *this,
 				gpa_t addr, int len, const void *data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
@@ -606,8 +609,9 @@
 	return 0;
 }
 
-static int speaker_ioport_read(struct kvm_io_device *this,
-			       gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_vcpu *vcpu,
+				   struct kvm_io_device *this,
+				   gpa_t addr, int len, void *data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index dd1b16b..c84990b 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,7 +3,7 @@
 
 #include <linux/kthread.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm_kpit_channel_state {
 	u32 count; /* can be 65536 */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 9541ba3..fef922f 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -529,42 +529,42 @@
 	return 0;
 }
 
-static int picdev_master_write(struct kvm_io_device *dev,
+static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			       gpa_t addr, int len, const void *val)
 {
 	return picdev_write(container_of(dev, struct kvm_pic, dev_master),
 			    addr, len, val);
 }
 
-static int picdev_master_read(struct kvm_io_device *dev,
+static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			      gpa_t addr, int len, void *val)
 {
 	return picdev_read(container_of(dev, struct kvm_pic, dev_master),
 			    addr, len, val);
 }
 
-static int picdev_slave_write(struct kvm_io_device *dev,
+static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			      gpa_t addr, int len, const void *val)
 {
 	return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
 			    addr, len, val);
 }
 
-static int picdev_slave_read(struct kvm_io_device *dev,
+static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			     gpa_t addr, int len, void *val)
 {
 	return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
 			    addr, len, val);
 }
 
-static int picdev_eclr_write(struct kvm_io_device *dev,
+static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			     gpa_t addr, int len, const void *val)
 {
 	return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
 			    addr, len, val);
 }
 
-static int picdev_eclr_read(struct kvm_io_device *dev,
+static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 			    gpa_t addr, int len, void *val)
 {
 	return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 46d4449..28146f0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -206,6 +206,8 @@
 
 	old_irr = ioapic->irr;
 	ioapic->irr |= mask;
+	if (edge)
+		ioapic->irr_delivered &= ~mask;
 	if ((edge && old_irr == ioapic->irr) ||
 	    (!edge && entry.fields.remote_irr)) {
 		ret = 0;
@@ -349,7 +351,7 @@
 	irqe.shorthand = 0;
 
 	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
-		ioapic->irr &= ~(1 << irq);
+		ioapic->irr_delivered |= 1 << irq;
 
 	if (irq == RTC_GSI && line_status) {
 		/*
@@ -473,13 +475,6 @@
 	}
 }
 
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	smp_rmb();
-	return test_bit(vector, ioapic->handled_vectors);
-}
-
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -500,8 +495,8 @@
 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
 
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-			    void *val)
+static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+				gpa_t addr, int len, void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 result;
@@ -543,8 +538,8 @@
 	return 0;
 }
 
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-			     const void *val)
+static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+				 gpa_t addr, int len, const void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 data;
@@ -599,6 +594,7 @@
 	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
 	ioapic->ioregsel = 0;
 	ioapic->irr = 0;
+	ioapic->irr_delivered = 0;
 	ioapic->id = 0;
 	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
 	rtc_irq_eoi_tracking_reset(ioapic);
@@ -656,6 +652,7 @@
 
 	spin_lock(&ioapic->lock);
 	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+	state->irr &= ~ioapic->irr_delivered;
 	spin_unlock(&ioapic->lock);
 	return 0;
 }
@@ -669,6 +666,7 @@
 	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
 	ioapic->irr = 0;
+	ioapic->irr_delivered = 0;
 	update_handled_vectors(ioapic);
 	kvm_vcpu_request_scan_ioapic(kvm);
 	kvm_ioapic_inject_all(ioapic, state->irr);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index c2e36d9..ca0b0b4 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -3,7 +3,7 @@
 
 #include <linux/kvm_host.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm;
 struct kvm_vcpu;
@@ -77,6 +77,7 @@
 	struct rtc_status rtc_status;
 	struct delayed_work eoi_inject;
 	u32 irq_eoi[IOAPIC_NUM_PINS];
+	u32 irr_delivered;
 };
 
 #ifdef DEBUG
@@ -97,13 +98,19 @@
 	return kvm->arch.vioapic;
 }
 
+static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	smp_rmb();
+	return test_bit(vector, ioapic->handled_vectors);
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
 			int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2d03568..ad68c73 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -27,7 +27,7 @@
 #include <linux/kvm_host.h>
 #include <linux/spinlock.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 #include "ioapic.h"
 #include "lapic.h"
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4ee827d..d67206a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -133,6 +133,28 @@
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+/* The logical map is definitely wrong if we have multiple
+ * modes at the same time.  (Physical map is always right.)
+ */
+static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+{
+	return !(map->mode & (map->mode - 1));
+}
+
+static inline void
+apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+{
+	unsigned lid_bits;
+
+	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
+	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
+	BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
+	lid_bits = map->mode;
+
+	*cid = dest_id >> lid_bits;
+	*lid = dest_id & ((1 << lid_bits) - 1);
+}
+
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -146,48 +168,6 @@
 	if (!new)
 		goto out;
 
-	new->ldr_bits = 8;
-	/* flat mode is default */
-	new->cid_shift = 8;
-	new->cid_mask = 0;
-	new->lid_mask = 0xff;
-	new->broadcast = APIC_BROADCAST;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		struct kvm_lapic *apic = vcpu->arch.apic;
-
-		if (!kvm_apic_present(vcpu))
-			continue;
-
-		if (apic_x2apic_mode(apic)) {
-			new->ldr_bits = 32;
-			new->cid_shift = 16;
-			new->cid_mask = new->lid_mask = 0xffff;
-			new->broadcast = X2APIC_BROADCAST;
-		} else if (kvm_apic_get_reg(apic, APIC_LDR)) {
-			if (kvm_apic_get_reg(apic, APIC_DFR) ==
-							APIC_DFR_CLUSTER) {
-				new->cid_shift = 4;
-				new->cid_mask = 0xf;
-				new->lid_mask = 0xf;
-			} else {
-				new->cid_shift = 8;
-				new->cid_mask = 0;
-				new->lid_mask = 0xff;
-			}
-		}
-
-		/*
-		 * All APICs have to be configured in the same mode by an OS.
-		 * We take advatage of this while building logical id loockup
-		 * table. After reset APICs are in software disabled mode, so if
-		 * we find apic with different setting we assume this is the mode
-		 * OS wants all apics to be in; build lookup table accordingly.
-		 */
-		if (kvm_apic_sw_enabled(apic))
-			break;
-	}
-
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
 		u16 cid, lid;
@@ -198,11 +178,25 @@
 
 		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
-		cid = apic_cluster_id(new, ldr);
-		lid = apic_logical_id(new, ldr);
 
 		if (aid < ARRAY_SIZE(new->phys_map))
 			new->phys_map[aid] = apic;
+
+		if (apic_x2apic_mode(apic)) {
+			new->mode |= KVM_APIC_MODE_X2APIC;
+		} else if (ldr) {
+			ldr = GET_APIC_LOGICAL_ID(ldr);
+			if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
+			else
+				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+		}
+
+		if (!kvm_apic_logical_map_valid(new))
+			continue;
+
+		apic_logical_id(new, ldr, &cid, &lid);
+
 		if (lid && cid < ARRAY_SIZE(new->logical_map))
 			new->logical_map[cid][ffs(lid) - 1] = apic;
 	}
@@ -588,15 +582,23 @@
 	apic_update_ppr(apic);
 }
 
-static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 {
-	return dest == (apic_x2apic_mode(apic) ?
-			X2APIC_BROADCAST : APIC_BROADCAST);
+	if (apic_x2apic_mode(apic))
+		return mda == X2APIC_BROADCAST;
+
+	return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
 }
 
-static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 {
-	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+	if (kvm_apic_broadcast(apic, mda))
+		return true;
+
+	if (apic_x2apic_mode(apic))
+		return mda == kvm_apic_id(apic);
+
+	return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
 }
 
 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -613,6 +615,7 @@
 		       && (logical_id & mda & 0xffff) != 0;
 
 	logical_id = GET_APIC_LOGICAL_ID(logical_id);
+	mda = GET_APIC_DEST_FIELD(mda);
 
 	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
 	case APIC_DFR_FLAT:
@@ -627,10 +630,27 @@
 	}
 }
 
+/* KVM APIC implementation has two quirks
+ *  - dest always begins at 0 while xAPIC MDA has offset 24,
+ *  - IOxAPIC messages have to be delivered (directly) to x2APIC.
+ */
+static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
+                                              struct kvm_lapic *target)
+{
+	bool ipi = source != NULL;
+	bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
+
+	if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+		return X2APIC_BROADCAST;
+
+	return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+}
+
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, unsigned int dest, int dest_mode)
 {
 	struct kvm_lapic *target = vcpu->arch.apic;
+	u32 mda = kvm_apic_mda(dest, source, target);
 
 	apic_debug("target %p, source %p, dest 0x%x, "
 		   "dest_mode 0x%x, short_hand 0x%x\n",
@@ -640,9 +660,9 @@
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
 		if (dest_mode == APIC_DEST_PHYSICAL)
-			return kvm_apic_match_physical_addr(target, dest);
+			return kvm_apic_match_physical_addr(target, mda);
 		else
-			return kvm_apic_match_logical_addr(target, dest);
+			return kvm_apic_match_logical_addr(target, mda);
 	case APIC_DEST_SELF:
 		return target == source;
 	case APIC_DEST_ALLINC:
@@ -664,6 +684,7 @@
 	struct kvm_lapic **dst;
 	int i;
 	bool ret = false;
+	bool x2apic_ipi = src && apic_x2apic_mode(src);
 
 	*r = -1;
 
@@ -675,15 +696,15 @@
 	if (irq->shorthand)
 		return false;
 
+	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+		return false;
+
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
 	if (!map)
 		goto out;
 
-	if (irq->dest_id == map->broadcast)
-		goto out;
-
 	ret = true;
 
 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
@@ -692,16 +713,20 @@
 
 		dst = &map->phys_map[irq->dest_id];
 	} else {
-		u32 mda = irq->dest_id << (32 - map->ldr_bits);
-		u16 cid = apic_cluster_id(map, mda);
+		u16 cid;
+
+		if (!kvm_apic_logical_map_valid(map)) {
+			ret = false;
+			goto out;
+		}
+
+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
 
 		if (cid >= ARRAY_SIZE(map->logical_map))
 			goto out;
 
 		dst = map->logical_map[cid];
 
-		bitmap = apic_logical_id(map, mda);
-
 		if (irq->delivery_mode == APIC_DM_LOWEST) {
 			int l = -1;
 			for_each_set_bit(i, &bitmap, 16) {
@@ -1037,7 +1062,7 @@
 	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
 }
 
-static int apic_mmio_read(struct kvm_io_device *this,
+static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 			   gpa_t address, int len, void *data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
@@ -1357,7 +1382,7 @@
 	return ret;
 }
 
-static int apic_mmio_write(struct kvm_io_device *this,
+static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
@@ -1497,8 +1522,6 @@
 		return;
 	}
 
-	if (!kvm_vcpu_is_bsp(apic->vcpu))
-		value &= ~MSR_IA32_APICBASE_BSP;
 	vcpu->arch.apic_base = value;
 
 	/* update jump label if enable bit changes */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0bc6c65..9d28383 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -1,7 +1,7 @@
 #ifndef __KVM_X86_LAPIC_H
 #define __KVM_X86_LAPIC_H
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 
@@ -148,21 +148,6 @@
 	return kvm_x86_ops->vm_has_apicv(kvm);
 }
 
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
-{
-	u16 cid;
-	ldr >>= 32 - map->ldr_bits;
-	cid = (ldr >> map->cid_shift) & map->cid_mask;
-
-	return cid;
-}
-
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
-	ldr >>= (32 - map->ldr_bits);
-	return ldr & map->lid_mask;
-}
-
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.apic->pending_events;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cee7592..146f295 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4465,6 +4465,79 @@
 		kvm_flush_remote_tlbs(kvm);
 }
 
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+		unsigned long *rmapp)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	int need_tlb_flush = 0;
+	pfn_t pfn;
+	struct kvm_mmu_page *sp;
+
+	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+		BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+		sp = page_header(__pa(sptep));
+		pfn = spte_to_pfn(*sptep);
+
+		/*
+		 * Only EPT supported for now; otherwise, one would need to
+		 * find out efficiently whether the guest page tables are
+		 * also using huge pages.
+		 */
+		if (sp->role.direct &&
+			!kvm_is_reserved_pfn(pfn) &&
+			PageTransCompound(pfn_to_page(pfn))) {
+			drop_spte(kvm, sptep);
+			sptep = rmap_get_first(*rmapp, &iter);
+			need_tlb_flush = 1;
+		} else
+			sptep = rmap_get_next(&iter);
+	}
+
+	return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+			struct kvm_memory_slot *memslot)
+{
+	bool flush = false;
+	unsigned long *rmapp;
+	unsigned long last_index, index;
+	gfn_t gfn_start, gfn_end;
+
+	spin_lock(&kvm->mmu_lock);
+
+	gfn_start = memslot->base_gfn;
+	gfn_end = memslot->base_gfn + memslot->npages - 1;
+
+	if (gfn_start >= gfn_end)
+		goto out;
+
+	rmapp = memslot->arch.rmap[0];
+	last_index = gfn_to_index(gfn_end, memslot->base_gfn,
+					PT_PAGE_TABLE_LEVEL);
+
+	for (index = 0; index <= last_index; ++index, ++rmapp) {
+		if (*rmapp)
+			flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
+
+		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+			if (flush) {
+				kvm_flush_remote_tlbs(kvm);
+				flush = false;
+			}
+			cond_resched_lock(&kvm->mmu_lock);
+		}
+	}
+
+	if (flush)
+		kvm_flush_remote_tlbs(kvm);
+
+out:
+	spin_unlock(&kvm->mmu_lock);
+}
+
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot)
 {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d8..29fbf9d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -38,7 +38,7 @@
 };
 
 /* mapping between fixed pmc index and arch_events array */
-int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {1, 0, 7};
 
 static bool pmc_is_gp(struct kvm_pmc *pmc)
 {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cc618c8..ce741b8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1261,7 +1261,7 @@
 
 	svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
 				   MSR_IA32_APICBASE_ENABLE;
-	if (kvm_vcpu_is_bsp(&svm->vcpu))
+	if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 
 	svm_init_osvw(&svm->vcpu);
@@ -1929,14 +1929,12 @@
 static int halt_interception(struct vcpu_svm *svm)
 {
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
-	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_halt(&svm->vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
 	kvm_emulate_hypercall(&svm->vcpu);
 	return 1;
 }
@@ -2757,11 +2755,11 @@
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 
-	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
-			  vcpu->arch.regs[VCPU_REGS_RAX]);
+	trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
+			  kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
+	kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	skip_emulated_instruction(&svm->vcpu);
@@ -2770,12 +2768,18 @@
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+	trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
 	return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+	kvm_emulate_wbinvd(&svm->vcpu);
+	return 1;
+}
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
 	u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
@@ -2902,7 +2906,8 @@
 	return 1;
 }
 
-bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
+					    unsigned long val)
 {
 	unsigned long cr0 = svm->vcpu.arch.cr0;
 	bool ret = false;
@@ -2940,7 +2945,10 @@
 		return emulate_on_interception(svm);
 
 	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
-	cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
+	if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+		cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+	else
+		cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
 
 	err = 0;
 	if (cr >= 16) { /* mov to cr */
@@ -3133,7 +3141,7 @@
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+	u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
 	u64 data;
 
 	if (svm_get_msr(&svm->vcpu, ecx, &data)) {
@@ -3142,8 +3150,8 @@
 	} else {
 		trace_kvm_msr_read(ecx, data);
 
-		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
-		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
+		kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
+		kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
 		skip_emulated_instruction(&svm->vcpu);
 	}
@@ -3246,9 +3254,8 @@
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
 	struct msr_data msr;
-	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
-	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
-		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+	u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+	u64 data = kvm_read_edx_eax(&svm->vcpu);
 
 	msr.data = data;
 	msr.index = ecx;
@@ -3325,7 +3332,7 @@
 	[SVM_EXIT_READ_CR3]			= cr_interception,
 	[SVM_EXIT_READ_CR4]			= cr_interception,
 	[SVM_EXIT_READ_CR8]			= cr_interception,
-	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
+	[SVM_EXIT_CR0_SEL_WRITE]		= cr_interception,
 	[SVM_EXIT_WRITE_CR0]			= cr_interception,
 	[SVM_EXIT_WRITE_CR3]			= cr_interception,
 	[SVM_EXIT_WRITE_CR4]			= cr_interception,
@@ -3376,7 +3383,7 @@
 	[SVM_EXIT_STGI]				= stgi_interception,
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= skinit_interception,
-	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
+	[SVM_EXIT_WBINVD]                       = wbinvd_interception,
 	[SVM_EXIT_MONITOR]			= monitor_interception,
 	[SVM_EXIT_MWAIT]			= mwait_interception,
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
@@ -3555,7 +3562,7 @@
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
-		WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+		WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ae4f6d3..f5e8dce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2470,6 +2470,7 @@
 	vmx->nested.nested_vmx_secondary_ctls_low = 0;
 	vmx->nested.nested_vmx_secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+		SECONDARY_EXEC_RDTSCP |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -3268,8 +3269,8 @@
 		 * default value.
 		 */
 		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
-			save->selector &= ~SELECTOR_RPL_MASK;
-		save->dpl = save->selector & SELECTOR_RPL_MASK;
+			save->selector &= ~SEGMENT_RPL_MASK;
+		save->dpl = save->selector & SEGMENT_RPL_MASK;
 		save->s = 1;
 	}
 	vmx_set_segment(vcpu, save, seg);
@@ -3842,7 +3843,7 @@
 	unsigned int cs_rpl;
 
 	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
-	cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+	cs_rpl = cs.selector & SEGMENT_RPL_MASK;
 
 	if (cs.unusable)
 		return false;
@@ -3870,7 +3871,7 @@
 	unsigned int ss_rpl;
 
 	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
-	ss_rpl = ss.selector & SELECTOR_RPL_MASK;
+	ss_rpl = ss.selector & SEGMENT_RPL_MASK;
 
 	if (ss.unusable)
 		return true;
@@ -3892,7 +3893,7 @@
 	unsigned int rpl;
 
 	vmx_get_segment(vcpu, &var, seg);
-	rpl = var.selector & SELECTOR_RPL_MASK;
+	rpl = var.selector & SEGMENT_RPL_MASK;
 
 	if (var.unusable)
 		return true;
@@ -3919,7 +3920,7 @@
 
 	if (tr.unusable)
 		return false;
-	if (tr.selector & SELECTOR_TI_MASK)	/* TI = 1 */
+	if (tr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
 		return false;
 	if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
 		return false;
@@ -3937,7 +3938,7 @@
 
 	if (ldtr.unusable)
 		return true;
-	if (ldtr.selector & SELECTOR_TI_MASK)	/* TI = 1 */
+	if (ldtr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
 		return false;
 	if (ldtr.type != 2)
 		return false;
@@ -3954,8 +3955,8 @@
 	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
 	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
 
-	return ((cs.selector & SELECTOR_RPL_MASK) ==
-		 (ss.selector & SELECTOR_RPL_MASK));
+	return ((cs.selector & SEGMENT_RPL_MASK) ==
+		 (ss.selector & SEGMENT_RPL_MASK));
 }
 
 /*
@@ -4711,7 +4712,7 @@
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
-	if (kvm_vcpu_is_bsp(&vmx->vcpu))
+	if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
 		apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
 	apic_base_msr.host_initiated = true;
 	kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
@@ -5006,7 +5007,7 @@
 		if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
 			if (vcpu->arch.halt_request) {
 				vcpu->arch.halt_request = 0;
-				return kvm_emulate_halt(vcpu);
+				return kvm_vcpu_halt(vcpu);
 			}
 			return 1;
 		}
@@ -5071,6 +5072,10 @@
 	}
 
 	if (is_invalid_opcode(intr_info)) {
+		if (is_guest_mode(vcpu)) {
+			kvm_queue_exception(vcpu, UD_VECTOR);
+			return 1;
+		}
 		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
 		if (er != EMULATE_DONE)
 			kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5090,9 +5095,10 @@
 	    !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
-		vcpu->run->internal.ndata = 2;
+		vcpu->run->internal.ndata = 3;
 		vcpu->run->internal.data[0] = vect_info;
 		vcpu->run->internal.data[1] = intr_info;
+		vcpu->run->internal.data[2] = error_code;
 		return 0;
 	}
 
@@ -5533,13 +5539,11 @@
 
 static int handle_halt(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
 	return kvm_emulate_halt(vcpu);
 }
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
 	kvm_emulate_hypercall(vcpu);
 	return 1;
 }
@@ -5570,7 +5574,6 @@
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
 	kvm_emulate_wbinvd(vcpu);
 	return 1;
 }
@@ -5828,7 +5831,7 @@
 	gpa_t gpa;
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-	if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+	if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		skip_emulated_instruction(vcpu);
 		return 1;
 	}
@@ -5909,7 +5912,7 @@
 
 		if (vcpu->arch.halt_request) {
 			vcpu->arch.halt_request = 0;
-			ret = kvm_emulate_halt(vcpu);
+			ret = kvm_vcpu_halt(vcpu);
 			goto out;
 		}
 
@@ -7318,21 +7321,21 @@
 		else if (port < 0x10000)
 			bitmap = vmcs12->io_bitmap_b;
 		else
-			return 1;
+			return true;
 		bitmap += (port & 0x7fff) / 8;
 
 		if (last_bitmap != bitmap)
 			if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
-				return 1;
+				return true;
 		if (b & (1 << (port & 7)))
-			return 1;
+			return true;
 
 		port++;
 		size--;
 		last_bitmap = bitmap;
 	}
 
-	return 0;
+	return false;
 }
 
 /*
@@ -7348,7 +7351,7 @@
 	gpa_t bitmap;
 
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
-		return 1;
+		return true;
 
 	/*
 	 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
@@ -7367,10 +7370,10 @@
 	if (msr_index < 1024*8) {
 		unsigned char b;
 		if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
-			return 1;
+			return true;
 		return 1 & (b >> (msr_index & 7));
 	} else
-		return 1; /* let L1 handle the wrong parameter */
+		return true; /* let L1 handle the wrong parameter */
 }
 
 /*
@@ -7392,7 +7395,7 @@
 		case 0:
 			if (vmcs12->cr0_guest_host_mask &
 			    (val ^ vmcs12->cr0_read_shadow))
-				return 1;
+				return true;
 			break;
 		case 3:
 			if ((vmcs12->cr3_target_count >= 1 &&
@@ -7403,37 +7406,37 @@
 					vmcs12->cr3_target_value2 == val) ||
 				(vmcs12->cr3_target_count >= 4 &&
 					vmcs12->cr3_target_value3 == val))
-				return 0;
+				return false;
 			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
-				return 1;
+				return true;
 			break;
 		case 4:
 			if (vmcs12->cr4_guest_host_mask &
 			    (vmcs12->cr4_read_shadow ^ val))
-				return 1;
+				return true;
 			break;
 		case 8:
 			if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
-				return 1;
+				return true;
 			break;
 		}
 		break;
 	case 2: /* clts */
 		if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
 		    (vmcs12->cr0_read_shadow & X86_CR0_TS))
-			return 1;
+			return true;
 		break;
 	case 1: /* mov from cr */
 		switch (cr) {
 		case 3:
 			if (vmcs12->cpu_based_vm_exec_control &
 			    CPU_BASED_CR3_STORE_EXITING)
-				return 1;
+				return true;
 			break;
 		case 8:
 			if (vmcs12->cpu_based_vm_exec_control &
 			    CPU_BASED_CR8_STORE_EXITING)
-				return 1;
+				return true;
 			break;
 		}
 		break;
@@ -7444,14 +7447,14 @@
 		 */
 		if (vmcs12->cr0_guest_host_mask & 0xe &
 		    (val ^ vmcs12->cr0_read_shadow))
-			return 1;
+			return true;
 		if ((vmcs12->cr0_guest_host_mask & 0x1) &&
 		    !(vmcs12->cr0_read_shadow & 0x1) &&
 		    (val & 0x1))
-			return 1;
+			return true;
 		break;
 	}
-	return 0;
+	return false;
 }
 
 /*
@@ -7474,48 +7477,48 @@
 				KVM_ISA_VMX);
 
 	if (vmx->nested.nested_run_pending)
-		return 0;
+		return false;
 
 	if (unlikely(vmx->fail)) {
 		pr_info_ratelimited("%s failed vm entry %x\n", __func__,
 				    vmcs_read32(VM_INSTRUCTION_ERROR));
-		return 1;
+		return true;
 	}
 
 	switch (exit_reason) {
 	case EXIT_REASON_EXCEPTION_NMI:
 		if (!is_exception(intr_info))
-			return 0;
+			return false;
 		else if (is_page_fault(intr_info))
 			return enable_ept;
 		else if (is_no_device(intr_info) &&
 			 !(vmcs12->guest_cr0 & X86_CR0_TS))
-			return 0;
+			return false;
 		return vmcs12->exception_bitmap &
 				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
 	case EXIT_REASON_EXTERNAL_INTERRUPT:
-		return 0;
+		return false;
 	case EXIT_REASON_TRIPLE_FAULT:
-		return 1;
+		return true;
 	case EXIT_REASON_PENDING_INTERRUPT:
 		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
 	case EXIT_REASON_NMI_WINDOW:
 		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
 	case EXIT_REASON_TASK_SWITCH:
-		return 1;
+		return true;
 	case EXIT_REASON_CPUID:
 		if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
-			return 0;
-		return 1;
+			return false;
+		return true;
 	case EXIT_REASON_HLT:
 		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
 	case EXIT_REASON_INVD:
-		return 1;
+		return true;
 	case EXIT_REASON_INVLPG:
 		return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
 	case EXIT_REASON_RDPMC:
 		return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-	case EXIT_REASON_RDTSC:
+	case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
 		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
 	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
 	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -7527,7 +7530,7 @@
 		 * VMX instructions trap unconditionally. This allows L1 to
 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
 		 */
-		return 1;
+		return true;
 	case EXIT_REASON_CR_ACCESS:
 		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
 	case EXIT_REASON_DR_ACCESS:
@@ -7538,7 +7541,7 @@
 	case EXIT_REASON_MSR_WRITE:
 		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
 	case EXIT_REASON_INVALID_STATE:
-		return 1;
+		return true;
 	case EXIT_REASON_MWAIT_INSTRUCTION:
 		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
 	case EXIT_REASON_MONITOR_INSTRUCTION:
@@ -7548,7 +7551,7 @@
 			nested_cpu_has2(vmcs12,
 				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
 	case EXIT_REASON_MCE_DURING_VMENTRY:
-		return 0;
+		return false;
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
 		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
 	case EXIT_REASON_APIC_ACCESS:
@@ -7557,7 +7560,7 @@
 	case EXIT_REASON_APIC_WRITE:
 	case EXIT_REASON_EOI_INDUCED:
 		/* apic_write and eoi_induced should exit unconditionally. */
-		return 1;
+		return true;
 	case EXIT_REASON_EPT_VIOLATION:
 		/*
 		 * L0 always deals with the EPT violation. If nested EPT is
@@ -7565,7 +7568,7 @@
 		 * missing in the guest EPT table (EPT12), the EPT violation
 		 * will be injected with nested_ept_inject_page_fault()
 		 */
-		return 0;
+		return false;
 	case EXIT_REASON_EPT_MISCONFIG:
 		/*
 		 * L2 never uses directly L1's EPT, but rather L0's own EPT
@@ -7573,11 +7576,11 @@
 		 * (EPT on EPT). So any problems with the structure of the
 		 * table is L0's fault.
 		 */
-		return 0;
+		return false;
 	case EXIT_REASON_WBINVD:
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
-		return 1;
+		return true;
 	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
 		/*
 		 * This should never happen, since it is not possible to
@@ -7587,7 +7590,7 @@
 		 */
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	default:
-		return 1;
+		return true;
 	}
 }
 
@@ -8522,6 +8525,9 @@
 						exec_control);
 			}
 		}
+		if (nested && !vmx->rdtscp_enabled)
+			vmx->nested.nested_vmx_secondary_ctls_high &=
+				~SECONDARY_EXEC_RDTSCP;
 	}
 
 	/* Exposing INVPCID only when PCID is exposed */
@@ -8622,10 +8628,11 @@
 					struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-		/* TODO: Also verify bits beyond physical address width are 0 */
-		if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+		if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
+		    vmcs12->apic_access_addr >> maxphyaddr)
 			return false;
 
 		/*
@@ -8641,8 +8648,8 @@
 	}
 
 	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-		/* TODO: Also verify bits beyond physical address width are 0 */
-		if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+		if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
+		    vmcs12->virtual_apic_page_addr >> maxphyaddr)
 			return false;
 
 		if (vmx->nested.virtual_apic_page) /* shouldn't happen */
@@ -8665,7 +8672,8 @@
 	}
 
 	if (nested_cpu_has_posted_intr(vmcs12)) {
-		if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
+		if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
+		    vmcs12->posted_intr_desc_addr >> maxphyaddr)
 			return false;
 
 		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
@@ -8864,9 +8872,9 @@
 
 static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 				       unsigned long count_field,
-				       unsigned long addr_field,
-				       int maxphyaddr)
+				       unsigned long addr_field)
 {
+	int maxphyaddr;
 	u64 count, addr;
 
 	if (vmcs12_read_any(vcpu, count_field, &count) ||
@@ -8876,6 +8884,7 @@
 	}
 	if (count == 0)
 		return 0;
+	maxphyaddr = cpuid_maxphyaddr(vcpu);
 	if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
 	    (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
 		pr_warn_ratelimited(
@@ -8889,19 +8898,16 @@
 static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
 						struct vmcs12 *vmcs12)
 {
-	int maxphyaddr;
-
 	if (vmcs12->vm_exit_msr_load_count == 0 &&
 	    vmcs12->vm_exit_msr_store_count == 0 &&
 	    vmcs12->vm_entry_msr_load_count == 0)
 		return 0; /* Fast path */
-	maxphyaddr = cpuid_maxphyaddr(vcpu);
 	if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
-					VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
+					VM_EXIT_MSR_LOAD_ADDR) ||
 	    nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
-					VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
+					VM_EXIT_MSR_STORE_ADDR) ||
 	    nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
-					VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
+					VM_ENTRY_MSR_LOAD_ADDR))
 		return -EINVAL;
 	return 0;
 }
@@ -9151,8 +9157,9 @@
 			exec_control &= ~SECONDARY_EXEC_RDTSCP;
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+				  SECONDARY_EXEC_RDTSCP |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
@@ -9385,7 +9392,6 @@
 	}
 
 	if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
-		/*TODO: Also verify bits beyond physical address width are 0*/
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
@@ -9524,7 +9530,7 @@
 	vmcs12->launch_state = 1;
 
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
-		return kvm_emulate_halt(vcpu);
+		return kvm_vcpu_halt(vcpu);
 
 	vmx->nested.nested_run_pending = 1;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 32bf19e..e1a8126 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -801,6 +801,17 @@
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+		for (i = 0; i < KVM_NR_DB_REGS; i++)
+			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+	}
+}
+
 static void kvm_update_dr6(struct kvm_vcpu *vcpu)
 {
 	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -1070,19 +1081,19 @@
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
 	u64 boot_ns;
 
-	boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
 
 	write_seqcount_begin(&vdata->seq);
 
 	/* copy pvclock gtod data */
-	vdata->clock.vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
-	vdata->clock.cycle_last		= tk->tkr.cycle_last;
-	vdata->clock.mask		= tk->tkr.mask;
-	vdata->clock.mult		= tk->tkr.mult;
-	vdata->clock.shift		= tk->tkr.shift;
+	vdata->clock.vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->clock.cycle_last		= tk->tkr_mono.cycle_last;
+	vdata->clock.mask		= tk->tkr_mono.mask;
+	vdata->clock.mult		= tk->tkr_mono.mult;
+	vdata->clock.shift		= tk->tkr_mono.shift;
 
 	vdata->boot_ns			= boot_ns;
-	vdata->nsec_base		= tk->tkr.xtime_nsec;
+	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 
 	write_seqcount_end(&vdata->seq);
 }
@@ -3149,6 +3160,7 @@
 		return -EINVAL;
 
 	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+	kvm_update_dr0123(vcpu);
 	vcpu->arch.dr6 = dbgregs->dr6;
 	kvm_update_dr6(vcpu);
 	vcpu->arch.dr7 = dbgregs->dr7;
@@ -4114,8 +4126,8 @@
 	do {
 		n = min(len, 8);
 		if (!(vcpu->arch.apic &&
-		      !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
-		    && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+		      !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+		    && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
 			break;
 		handled += n;
 		addr += n;
@@ -4134,8 +4146,9 @@
 	do {
 		n = min(len, 8);
 		if (!(vcpu->arch.apic &&
-		      !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
-		    && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+		      !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+					 addr, n, v))
+		    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
 			break;
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
 		handled += n;
@@ -4475,7 +4488,8 @@
 	return X86EMUL_CONTINUE;
 }
 
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+			unsigned long addr,
 			void *val, unsigned int bytes,
 			struct x86_exception *exception,
 			const struct read_write_emulator_ops *ops)
@@ -4538,7 +4552,7 @@
 				   exception, &read_emultor);
 }
 
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
 			    unsigned long addr,
 			    const void *val,
 			    unsigned int bytes,
@@ -4629,10 +4643,10 @@
 	int r;
 
 	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+		r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
 				    vcpu->arch.pio.size, pd);
 	else
-		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+		r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
 				     vcpu->arch.pio.port, vcpu->arch.pio.size,
 				     pd);
 	return r;
@@ -4705,7 +4719,7 @@
 	kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
 	if (!need_emulate_wbinvd(vcpu))
 		return X86EMUL_CONTINUE;
@@ -4722,19 +4736,29 @@
 		wbinvd();
 	return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-	kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+	kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+			   unsigned long *dest)
 {
 	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+			   unsigned long value)
 {
 
 	return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5816,7 +5840,7 @@
 	free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
 	if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5827,6 +5851,13 @@
 		return 0;
 	}
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	return kvm_vcpu_halt(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5903,7 +5934,7 @@
 	lapic_irq.dest_id = apicid;
 
 	lapic_irq.delivery_mode = APIC_DM_REMRD;
-	kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5911,6 +5942,8 @@
 	unsigned long nr, a0, a1, a2, a3, ret;
 	int op_64_bit, r = 1;
 
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+
 	if (kvm_hv_hypercall_enabled(vcpu->kvm))
 		return kvm_hv_hypercall(vcpu);
 
@@ -6164,7 +6197,7 @@
 }
 
 /*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
  * userspace.
  */
@@ -6301,6 +6334,7 @@
 		set_debugreg(vcpu->arch.eff_db[2], 2);
 		set_debugreg(vcpu->arch.eff_db[3], 3);
 		set_debugreg(vcpu->arch.dr6, 6);
+		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
@@ -6382,42 +6416,47 @@
 	return r;
 }
 
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+	if (!kvm_arch_vcpu_runnable(vcpu)) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		kvm_vcpu_block(vcpu);
+		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+			return 1;
+	}
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+	kvm_apic_accept_events(vcpu);
+	switch(vcpu->arch.mp_state) {
+	case KVM_MP_STATE_HALTED:
+		vcpu->arch.pv.pv_unhalted = false;
+		vcpu->arch.mp_state =
+			KVM_MP_STATE_RUNNABLE;
+	case KVM_MP_STATE_RUNNABLE:
+		vcpu->arch.apf.halted = false;
+		break;
+	case KVM_MP_STATE_INIT_RECEIVED:
+		break;
+	default:
+		return -EINTR;
+		break;
+	}
+	return 1;
+}
+
+static int vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
 	struct kvm *kvm = vcpu->kvm;
 
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
-	r = 1;
-	while (r > 0) {
+	for (;;) {
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		    !vcpu->arch.apf.halted)
 			r = vcpu_enter_guest(vcpu);
-		else {
-			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-			kvm_vcpu_block(vcpu);
-			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-			if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
-				kvm_apic_accept_events(vcpu);
-				switch(vcpu->arch.mp_state) {
-				case KVM_MP_STATE_HALTED:
-					vcpu->arch.pv.pv_unhalted = false;
-					vcpu->arch.mp_state =
-						KVM_MP_STATE_RUNNABLE;
-				case KVM_MP_STATE_RUNNABLE:
-					vcpu->arch.apf.halted = false;
-					break;
-				case KVM_MP_STATE_INIT_RECEIVED:
-					break;
-				default:
-					r = -EINTR;
-					break;
-				}
-			}
-		}
-
+		else
+			r = vcpu_block(kvm, vcpu);
 		if (r <= 0)
 			break;
 
@@ -6429,6 +6468,7 @@
 			r = -EINTR;
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.request_irq_exits;
+			break;
 		}
 
 		kvm_check_async_pf_completion(vcpu);
@@ -6437,6 +6477,7 @@
 			r = -EINTR;
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.signal_exits;
+			break;
 		}
 		if (need_resched()) {
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6568,7 +6609,7 @@
 	} else
 		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
-	r = __vcpu_run(vcpu);
+	r = vcpu_run(vcpu);
 
 out:
 	post_kvm_run_save(vcpu);
@@ -7075,11 +7116,14 @@
 	kvm_clear_exception_queue(vcpu);
 
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+	kvm_update_dr0123(vcpu);
 	vcpu->arch.dr6 = DR6_INIT;
 	kvm_update_dr6(vcpu);
 	vcpu->arch.dr7 = DR7_FIXED_1;
 	kvm_update_dr7(vcpu);
 
+	vcpu->arch.cr2 = 0;
+
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	vcpu->arch.apf.msr_val = 0;
 	vcpu->arch.st.msr_val = 0;
@@ -7240,7 +7284,7 @@
 
 	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
 		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7288,6 +7332,8 @@
 	vcpu->arch.guest_supported_xcr0 = 0;
 	vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
+	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
 
@@ -7428,7 +7474,7 @@
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
-			kvm_kvfree(free->arch.rmap[i]);
+			kvfree(free->arch.rmap[i]);
 			free->arch.rmap[i] = NULL;
 		}
 		if (i == 0)
@@ -7436,7 +7482,7 @@
 
 		if (!dont || free->arch.lpage_info[i - 1] !=
 			     dont->arch.lpage_info[i - 1]) {
-			kvm_kvfree(free->arch.lpage_info[i - 1]);
+			kvfree(free->arch.lpage_info[i - 1]);
 			free->arch.lpage_info[i - 1] = NULL;
 		}
 	}
@@ -7490,12 +7536,12 @@
 
 out_free:
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-		kvm_kvfree(slot->arch.rmap[i]);
+		kvfree(slot->arch.rmap[i]);
 		slot->arch.rmap[i] = NULL;
 		if (i == 0)
 			continue;
 
-		kvm_kvfree(slot->arch.lpage_info[i - 1]);
+		kvfree(slot->arch.lpage_info[i - 1]);
 		slot->arch.lpage_info[i - 1] = NULL;
 	}
 	return -ENOMEM;
@@ -7618,6 +7664,23 @@
 	new = id_to_memslot(kvm->memslots, mem->slot);
 
 	/*
+	 * Dirty logging tracks sptes in 4k granularity, meaning that large
+	 * sptes have to be split.  If live migration is successful, the guest
+	 * in the source machine will be destroyed and large sptes will be
+	 * created in the destination. However, if the guest continues to run
+	 * in the source machine (for example if live migration fails), small
+	 * sptes will remain around and cause bad performance.
+	 *
+	 * Scan sptes if dirty logging has been stopped, dropping those
+	 * which can be collapsed into a single large-page spte.  Later
+	 * page faults will create the large-page sptes.
+	 */
+	if ((change != KVM_MR_DELETE) &&
+		(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+		!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+		kvm_mmu_zap_collapsible_sptes(kvm, new);
+
+	/*
 	 * Set up write protection and/or dirty logging for the new slot.
 	 *
 	 * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ac4453d..717908b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -868,7 +868,8 @@
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+			set_intr_gate(i, irq_entries_start +
+					8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 
 	/*
@@ -1076,6 +1077,7 @@
 {
 	lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
 		   THREAD_SIZE / PAGE_SIZE);
+	tss->x86_tss.sp0 = thread->sp0;
 }
 
 /* Let's just say, I wouldn't do debugging under a Guest. */
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index f5cc9eb..082a851 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -13,16 +13,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/dwarf2.h>
 
-.macro SAVE reg
-	pushl_cfi %\reg
-	CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-	popl_cfi %\reg
-	CFI_RESTORE \reg
-.endm
-
 .macro read64 reg
 	movl %ebx, %eax
 	movl %ecx, %edx
@@ -67,10 +57,10 @@
 .macro addsub_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
-	SAVE esi
-	SAVE edi
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 
 	movl %eax, %esi
 	movl %edx, %edi
@@ -89,10 +79,10 @@
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE edi
-	RESTORE esi
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@
 .macro incdec_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -119,7 +109,7 @@
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@
 
 ENTRY(atomic64_dec_if_positive_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -146,18 +136,18 @@
 2:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
 
 ENTRY(atomic64_add_unless_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
 /* these just push these two parameters on the stack */
-	SAVE edi
-	SAVE ecx
+	pushl_cfi_reg edi
+	pushl_cfi_reg ecx
 
 	movl %eax, %ebp
 	movl %edx, %edi
@@ -179,8 +169,8 @@
 3:
 	addl $8, %esp
 	CFI_ADJUST_CFA_OFFSET -8
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 4:
 	cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@
 
 ENTRY(atomic64_inc_not_zero_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -209,7 +199,7 @@
 
 	movl $1, %eax
 3:
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index e78b8ee..9bc944a 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -51,10 +51,8 @@
 	   */		
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
@@ -127,14 +125,12 @@
 6:	addl %ecx,%eax
 	adcl $0, %eax 
 7:	
-	testl $1, 12(%esp)
+	testb $1, 12(%esp)
 	jz 8f
 	roll $8, %eax
 8:
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -145,10 +141,8 @@
 
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
@@ -251,14 +245,12 @@
 	addl %ebx,%eax
 	adcl $0,%eax
 80: 
-	testl $1, 12(%esp)
+	testb $1, 12(%esp)
 	jz 90f
 	roll $8, %eax
 90: 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -298,12 +290,9 @@
 	CFI_STARTPROC
 	subl  $4,%esp	
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl ARGBASE+16(%esp),%eax	# sum
 	movl ARGBASE+12(%esp),%ecx	# len
 	movl ARGBASE+4(%esp),%esi	# src
@@ -412,12 +401,9 @@
 
 .previous
 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
 	popl_cfi %ecx			# equivalent to addl $4,%esp
 	ret	
 	CFI_ENDPROC
@@ -441,12 +427,9 @@
 		
 ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
+	pushl_cfi_reg ebx
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
 	movl ARGBASE+4(%esp),%esi	#src
 	movl ARGBASE+8(%esp),%edi	#dst	
 	movl ARGBASE+12(%esp),%ecx	#len
@@ -506,12 +489,9 @@
 	jmp  7b			
 .previous				
 
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebx
-	CFI_RESTORE ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index f2145cf..e67e579 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,31 +1,35 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
 /*
- * Zero a page. 	
- * rdi	page
- */			
-ENTRY(clear_page_c)
+ * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+ * recommended to use this when possible and we do use them by default.
+ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+ * Otherwise, use original.
+ */
+
+/*
+ * Zero a page.
+ * %rdi	- page
+ */
+ENTRY(clear_page)
 	CFI_STARTPROC
+
+	ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp clear_page_c_e", X86_FEATURE_ERMS
+
 	movl $4096/8,%ecx
 	xorl %eax,%eax
 	rep stosq
 	ret
 	CFI_ENDPROC
-ENDPROC(clear_page_c)
+ENDPROC(clear_page)
 
-ENTRY(clear_page_c_e)
+ENTRY(clear_page_orig)
 	CFI_STARTPROC
-	movl $4096,%ecx
-	xorl %eax,%eax
-	rep stosb
-	ret
-	CFI_ENDPROC
-ENDPROC(clear_page_c_e)
 
-ENTRY(clear_page)
-	CFI_STARTPROC
 	xorl   %eax,%eax
 	movl   $4096/64,%ecx
 	.p2align 4
@@ -45,29 +49,13 @@
 	nop
 	ret
 	CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
+ENDPROC(clear_page_orig)
 
-	/*
-	 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
-	 * It is recommended to use this when possible.
-	 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
-	 * Otherwise, use original function.
-	 *
-	 */
-
-#include <asm/cpufeature.h>
-
-	.section .altinstr_replacement,"ax"
-1:	.byte 0xeb					/* jmp <disp8> */
-	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
-2:	.byte 0xeb					/* jmp <disp8> */
-	.byte (clear_page_c_e - clear_page) - (3f - 2b)	/* offset */
-3:
-	.previous
-	.section .altinstructions,"a"
-	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
-			     .Lclear_page_end-clear_page, 2b-1b
-	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
-			     .Lclear_page_end-clear_page,3b-2b
-	.previous
+ENTRY(clear_page_c_e)
+	CFI_STARTPROC
+	movl $4096,%ecx
+	xorl %eax,%eax
+	rep stosb
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page_c_e)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 176cca6..8239dbc 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,23 +2,26 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+/*
+ * Some CPUs run faster using the string copy instructions (sane microcode).
+ * It is also a lot simpler. Use this when possible. But, don't use streaming
+ * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
+ * prefetch distance based on SMP/UP.
+ */
 	ALIGN
-copy_page_rep:
+ENTRY(copy_page)
 	CFI_STARTPROC
+	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
 	movl	$4096/8, %ecx
 	rep	movsq
 	ret
 	CFI_ENDPROC
-ENDPROC(copy_page_rep)
+ENDPROC(copy_page)
 
-/*
- *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
- *  Could vary the prefetch distance based on SMP/UP.
-*/
-
-ENTRY(copy_page)
+ENTRY(copy_page_regs)
 	CFI_STARTPROC
 	subq	$2*8,	%rsp
 	CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@
 	addq	$2*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -2*8
 	ret
-.Lcopy_page_end:
 	CFI_ENDPROC
-ENDPROC(copy_page)
-
-	/* Some CPUs run faster using the string copy instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
-	.section .altinstr_replacement,"ax"
-1:	.byte 0xeb					/* jmp <disp8> */
-	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
-2:
-	.previous
-	.section .altinstructions,"a"
-	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\
-		.Lcopy_page_end-copy_page, 2b-1b
-	.previous
+ENDPROC(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dee945d..fa997df 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,9 +8,6 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
@@ -19,33 +16,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 
-/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
-	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
-	.byte 0xe9	/* 32bit jump */
-	.long \orig-1f	/* by default jump to orig */
-1:
-	.section .altinstr_replacement,"ax"
-2:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
-3:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
-	.previous
-
-	.section .altinstructions,"a"
-	altinstruction_entry 0b,2b,\feature1,5,5
-	altinstruction_entry 0b,3b,\feature2,5,5
-	.previous
-	.endm
-
 	.macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
 	/* check for bad alignment of destination */
 	movl %edi,%ecx
 	andl $7,%ecx
@@ -67,7 +38,6 @@
 
 	_ASM_EXTABLE(100b,103b)
 	_ASM_EXTABLE(101b,103b)
-#endif
 	.endm
 
 /* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@
 	jc bad_to_user
 	cmpq TI_addr_limit(%rax),%rcx
 	ja bad_to_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
-		copy_user_generic_unrolled,copy_user_generic_string,	\
-		copy_user_enhanced_fast_string
+	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
+		      "jmp copy_user_generic_string",		\
+		      X86_FEATURE_REP_GOOD,			\
+		      "jmp copy_user_enhanced_fast_string",	\
+		      X86_FEATURE_ERMS
 	CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -94,9 +66,11 @@
 	jc bad_from_user
 	cmpq TI_addr_limit(%rax),%rcx
 	ja bad_from_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
-		copy_user_generic_unrolled,copy_user_generic_string,	\
-		copy_user_enhanced_fast_string
+	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
+		      "jmp copy_user_generic_string",		\
+		      X86_FEATURE_REP_GOOD,			\
+		      "jmp copy_user_enhanced_fast_string",	\
+		      X86_FEATURE_ERMS
 	CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 2419d5f..9734182 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -196,7 +196,7 @@
 
 	/* handle last odd byte */
 .Lhandle_1:
-	testl $1, %r10d
+	testb $1, %r10b
 	jz    .Lende
 	xorl  %ebx, %ebx
 	source
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1313ae6..8f72b33 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -52,6 +52,13 @@
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
 {
+	/*
+	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+	 * even if the input buffer is long enough to hold them.
+	 */
+	if (buf_len > MAX_INSN_SIZE)
+		buf_len = MAX_INSN_SIZE;
+
 	memset(insn, 0, sizeof(*insn));
 	insn->kaddr = kaddr;
 	insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@
 				/* VEX.W overrides opnd_size */
 				insn->opnd_bytes = 8;
 		} else {
+			/*
+			 * For VEX2, fake VEX3-like byte#2.
+			 * Makes it easier to decode vex.W, vex.vvvv,
+			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+			 */
+			insn->vex_prefix.bytes[2] = b2 & 0x7f;
 			insn->vex_prefix.nbytes = 2;
 			insn->next_byte += 2;
 		}
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 89b53c9..b046664 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,12 +1,20 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
 #include <asm/alternative-asm.h>
 
 /*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
+/*
  * memcpy - Copy a memory block.
  *
  * Input:
@@ -17,15 +25,11 @@
  * Output:
  * rax original destination
  */
+ENTRY(__memcpy)
+ENTRY(memcpy)
+	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
-/*
- * memcpy_c() - fast string ops (REP MOVSQ) based variant.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
- */
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c:
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	shrq $3, %rcx
@@ -34,29 +38,21 @@
 	movl %edx, %ecx
 	rep movsb
 	ret
-.Lmemcpy_e:
-	.previous
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
 
 /*
- * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
- * memcpy_c. Use memcpy_c_e when possible.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
  */
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c_e:
+ENTRY(memcpy_erms)
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	rep movsb
 	ret
-.Lmemcpy_e_e:
-	.previous
+ENDPROC(memcpy_erms)
 
-.weak memcpy
-
-ENTRY(__memcpy)
-ENTRY(memcpy)
+ENTRY(memcpy_orig)
 	CFI_STARTPROC
 	movq %rdi, %rax
 
@@ -183,26 +179,4 @@
 .Lend:
 	retq
 	CFI_ENDPROC
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-
-	/*
-	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
-	 * If the feature is supported, memcpy_c_e() is the first choice.
-	 * If enhanced rep movsb copy is not available, use fast string copy
-	 * memcpy_c() when possible. This is faster and code is simpler than
-	 * original memcpy().
-	 * Otherwise, original memcpy() is used.
-	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-	 *
-	 * Replace only beginning, memcpy is used to apply alternatives,
-	 * so it is silly to overwrite itself with nops - reboot is the
-	 * only outcome...
-	 */
-	.section .altinstructions, "a"
-	altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
-			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
-	altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
-			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
-	.previous
+ENDPROC(memcpy_orig)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 9c4b530..0f8a0d0 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -5,7 +5,6 @@
  * This assembly file is re-written from memmove_64.c file.
  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
-#define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeature.h>
@@ -44,6 +43,8 @@
 	jg 2f
 
 .Lmemmove_begin_forward:
+	ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
+
 	/*
 	 * movsq instruction have many startup latency
 	 * so we handle small size by general register.
@@ -207,21 +208,5 @@
 13:
 	retq
 	CFI_ENDPROC
-
-	.section .altinstr_replacement,"ax"
-.Lmemmove_begin_forward_efs:
-	/* Forward moving data. */
-	movq %rdx, %rcx
-	rep movsb
-	retq
-.Lmemmove_end_forward_efs:
-	.previous
-
-	.section .altinstructions,"a"
-	altinstruction_entry .Lmemmove_begin_forward,		\
-		.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,	\
-		.Lmemmove_end_forward-.Lmemmove_begin_forward,	\
-		.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
-	.previous
 ENDPROC(__memmove)
 ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 6f44935..93118fb 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -5,19 +5,30 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+.weak memset
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
  * string to get better performance than the original function. The code is
  * simpler and shorter than the orignal function as well.
- *	
+ *
  * rdi   destination
- * rsi   value (char) 
- * rdx   count (bytes) 
- * 
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
  * rax   original destination
- */	
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemset_c:
+ */
+ENTRY(memset)
+ENTRY(__memset)
+	/*
+	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+	 * to use it when possible. If not available, use fast string instructions.
+	 *
+	 * Otherwise, use original memset function.
+	 */
+	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+		      "jmp memset_erms", X86_FEATURE_ERMS
+
 	movq %rdi,%r9
 	movq %rdx,%rcx
 	andl $7,%edx
@@ -31,8 +42,8 @@
 	rep stosb
 	movq %r9,%rax
 	ret
-.Lmemset_e:
-	.previous
+ENDPROC(memset)
+ENDPROC(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
@@ -45,21 +56,16 @@
  *
  * rax   original destination
  */
-	.section .altinstr_replacement, "ax", @progbits
-.Lmemset_c_e:
+ENTRY(memset_erms)
 	movq %rdi,%r9
 	movb %sil,%al
 	movq %rdx,%rcx
 	rep stosb
 	movq %r9,%rax
 	ret
-.Lmemset_e_e:
-	.previous
+ENDPROC(memset_erms)
 
-.weak memset
-
-ENTRY(memset)
-ENTRY(__memset)
+ENTRY(memset_orig)
 	CFI_STARTPROC
 	movq %rdi,%r10
 
@@ -134,23 +140,4 @@
 	jmp .Lafter_bad_alignment
 .Lfinal:
 	CFI_ENDPROC
-ENDPROC(memset)
-ENDPROC(__memset)
-
-	/* Some CPUs support enhanced REP MOVSB/STOSB feature.
-	 * It is recommended to use this when possible.
-	 *
-	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
-	 * instructions.
-	 *
-	 * Otherwise, use original memset function.
-	 *
-	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-	 */
-	.section .altinstructions,"a"
-	altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
-			     .Lfinal-__memset,.Lmemset_e-.Lmemset_c
-	altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
-			     .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
-	.previous
+ENDPROC(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index f6d13ee..3ca5218 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -14,8 +14,8 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushq_cfi %rbx
-	pushq_cfi %rbp
+	pushq_cfi_reg rbx
+	pushq_cfi_reg rbp
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
@@ -35,8 +35,8 @@
 	movl    %ebp, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq_cfi %rbp
-	popq_cfi %rbx
+	popq_cfi_reg rbp
+	popq_cfi_reg rbx
 	ret
 3:
 	CFI_RESTORE_STATE
@@ -53,10 +53,10 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	pushl_cfi %ebp
-	pushl_cfi %esi
-	pushl_cfi %edi
+	pushl_cfi_reg ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 	pushl_cfi $0              /* Return value */
 	pushl_cfi %eax
 	movl    4(%eax), %ecx
@@ -80,10 +80,10 @@
 	movl    %esi, 24(%eax)
 	movl    %edi, 28(%eax)
 	popl_cfi %eax
-	popl_cfi %edi
-	popl_cfi %esi
-	popl_cfi %ebp
-	popl_cfi %ebx
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebp
+	popl_cfi_reg ebx
 	ret
 3:
 	CFI_RESTORE_STATE
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 5dff5f0..2322abe 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -34,10 +34,10 @@
  */
 
 #define save_common_regs \
-	pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+	pushl_cfi_reg ecx
 
 #define restore_common_regs \
-	popl_cfi %ecx; CFI_RESTORE ecx
+	popl_cfi_reg ecx
 
 	/* Avoid uglifying the argument copying x86-64 needs to do. */
 	.macro movq src, dst
@@ -64,22 +64,22 @@
  */
 
 #define save_common_regs \
-	pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
-	pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
-	pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
-	pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
-	pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
-	pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
-	pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+	pushq_cfi_reg rdi; \
+	pushq_cfi_reg rsi; \
+	pushq_cfi_reg rcx; \
+	pushq_cfi_reg r8;  \
+	pushq_cfi_reg r9;  \
+	pushq_cfi_reg r10; \
+	pushq_cfi_reg r11
 
 #define restore_common_regs \
-	popq_cfi %r11; CFI_RESTORE r11; \
-	popq_cfi %r10; CFI_RESTORE r10; \
-	popq_cfi %r9;  CFI_RESTORE r9; \
-	popq_cfi %r8;  CFI_RESTORE r8; \
-	popq_cfi %rcx; CFI_RESTORE rcx; \
-	popq_cfi %rsi; CFI_RESTORE rsi; \
-	popq_cfi %rdi; CFI_RESTORE rdi
+	popq_cfi_reg r11; \
+	popq_cfi_reg r10; \
+	popq_cfi_reg r9; \
+	popq_cfi_reg r8; \
+	popq_cfi_reg rcx; \
+	popq_cfi_reg rsi; \
+	popq_cfi_reg rdi
 
 #endif
 
@@ -87,12 +87,10 @@
 ENTRY(call_rwsem_down_read_failed)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_down_read_failed
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
@@ -124,12 +122,10 @@
 ENTRY(call_rwsem_downgrade_wake)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_downgrade_wake
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index e28cdaf..5eb7150 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -13,12 +13,9 @@
 	.globl \name
 \name:
 	CFI_STARTPROC
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ecx
+	pushl_cfi_reg edx
 
 	.if \put_ret_addr_in_eax
 	/* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
 	.endif
 
 	call \func
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg edx
+	popl_cfi_reg ecx
+	popl_cfi_reg eax
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(\name)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5eb..f89ba4e9 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,18 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	pushq_cfi_reg rdi
+	pushq_cfi_reg rsi
+	pushq_cfi_reg rdx
+	pushq_cfi_reg rcx
+	pushq_cfi_reg rax
+	pushq_cfi_reg r8
+	pushq_cfi_reg r9
+	pushq_cfi_reg r10
+	pushq_cfi_reg r11
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +54,22 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	popq_cfi_reg r11
+	popq_cfi_reg r10
+	popq_cfi_reg r9
+	popq_cfi_reg r8
+	popq_cfi_reg rax
+	popq_cfi_reg rcx
+	popq_cfi_reg rdx
+	popq_cfi_reg rsi
+	popq_cfi_reg rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
+#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index c905e89..1f33b3d 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -69,21 +69,20 @@
  * it is not necessary to optimize tail handling.
  */
 __visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+copy_user_handle_tail(char *to, char *from, unsigned len)
 {
-	char c;
-	unsigned zero_len;
-
 	for (; len; --len, to++) {
+		char c;
+
 		if (__get_user_nocheck(c, from++, sizeof(char)))
 			break;
 		if (__put_user_nocheck(c, to, sizeof(char)))
 			break;
 	}
-
-	for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
-		if (__put_user_nocheck(c, to++, sizeof(char)))
-			break;
 	clac();
+
+	/* If the destination is a kernel buffer, we always clear the end */
+	if ((unsigned long)to >= TASK_SIZE_MAX)
+		memset(to, 0, len);
 	return len;
 }
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 1a2be7c..816488c 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -273,6 +273,9 @@
 de: ESC
 df: ESC
 # 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
 e0: LOOPNE/LOOPNZ Jb (f64)
 e1: LOOPE/LOOPZ Jb (f64)
 e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@
 e5: IN eAX,Ib
 e6: OUT Ib,AL
 e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
 e8: CALL Jz (f64)
 e9: JMP-near Jz (f64)
 ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
 # 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
 81: JNO Jz (f64)
 82: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@
 GrpTable: Grp5
 0: INC Ev
 1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 2: CALLN Ev (f64)
 3: CALLF Ep
 4: JMPN Ev (f64)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ede025f..181c53b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -59,7 +59,7 @@
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
-	if (kprobes_built_in() && !user_mode_vm(regs)) {
+	if (kprobes_built_in() && !user_mode(regs)) {
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, 14))
 			ret = 1;
@@ -148,7 +148,7 @@
 	instr = (void *)convert_ip_to_linear(current, regs);
 	max_instr = instr + 15;
 
-	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
 		return 0;
 
 	while (instr < max_instr) {
@@ -1035,7 +1035,7 @@
 	if (error_code & PF_USER)
 		return false;
 
-	if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+	if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
 		return false;
 
 	return true;
@@ -1140,7 +1140,7 @@
 	 * User-mode registers count as a user access even for any
 	 * potential system fault or CPU buglet:
 	 */
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		local_irq_enable();
 		error_code |= PF_USER;
 		flags |= FAULT_FLAG_USER;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a110efc..1d55318 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,29 +29,33 @@
 
 /*
  * Tables translating between page_cache_type_t and pte encoding.
- * Minimal supported modes are defined statically, modified if more supported
- * cache modes are available.
- * Index into __cachemode2pte_tbl is the cachemode.
- * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
- * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ *
+ * Minimal supported modes are defined statically, they are modified
+ * during bootup if more supported cache modes are available.
+ *
+ *   Index into __cachemode2pte_tbl[] is the cachemode.
+ *
+ *   Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
+ *   (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
  */
 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
-	[_PAGE_CACHE_MODE_WB]		= 0,
-	[_PAGE_CACHE_MODE_WC]		= _PAGE_PWT,
-	[_PAGE_CACHE_MODE_UC_MINUS]	= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_UC]		= _PAGE_PCD | _PAGE_PWT,
-	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WB      ]	= 0         | 0        ,
+	[_PAGE_CACHE_MODE_WC      ]	= _PAGE_PWT | 0        ,
+	[_PAGE_CACHE_MODE_UC_MINUS]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_UC      ]	= _PAGE_PWT | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WT      ]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WP      ]	= 0         | _PAGE_PCD,
 };
 EXPORT_SYMBOL(__cachemode2pte_tbl);
+
 uint8_t __pte2cachemode_tbl[8] = {
-	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
-	[__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx( 0        | 0         | 0        )] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx( 0        | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC,
+	[__pte2cm_idx( 0        | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx(0         | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
 EXPORT_SYMBOL(__pte2cachemode_tbl);
@@ -131,21 +135,7 @@
 
 int after_bootmem;
 
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
-				= 1
-#endif
-;
-
-static void __init init_gbpages(void)
-{
-#ifdef CONFIG_X86_64
-	if (direct_gbpages && cpu_has_gbpages)
-		printk(KERN_INFO "Using GB pages for direct mapping\n");
-	else
-		direct_gbpages = 0;
-#endif
-}
+early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
 struct map_range {
 	unsigned long start;
@@ -157,16 +147,12 @@
 
 static void __init probe_page_size_mask(void)
 {
-	init_gbpages();
-
 #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (direct_gbpages)
-		page_size_mask |= 1 << PG_LEVEL_1G;
 	if (cpu_has_pse)
 		page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
@@ -179,6 +165,15 @@
 	if (cpu_has_pge) {
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
+	} else
+		__supported_pte_mask &= ~_PAGE_GLOBAL;
+
+	/* Enable 1 GB linear kernel mappings if available: */
+	if (direct_gbpages && cpu_has_gbpages) {
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	} else {
+		direct_gbpages = 0;
 	}
 }
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05a..3fba623 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -130,20 +130,6 @@
 	return 0;
 }
 
-static int __init parse_direct_gbpages_off(char *arg)
-{
-	direct_gbpages = 0;
-	return 0;
-}
-early_param("nogbpages", parse_direct_gbpages_off);
-
-static int __init parse_direct_gbpages_on(char *arg)
-{
-	direct_gbpages = 1;
-	return 0;
-}
-early_param("gbpages", parse_direct_gbpages_on);
-
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index cd4785b..4053bb5 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -482,9 +482,16 @@
 				  &memblock.reserved, mb->nid);
 	}
 
-	/* Mark all kernel nodes. */
+	/*
+	 * Mark all kernel nodes.
+	 *
+	 * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
+	 * may not include all the memblock.reserved memory ranges because
+	 * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+	 */
 	for_each_memblock(reserved, r)
-		node_set(r->nid, numa_kernel_nodes);
+		if (r->nid != MAX_NUMNODES)
+			node_set(r->nid, numa_kernel_nodes);
 
 	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
 	for (i = 0; i < numa_meminfo.nr_blks; i++) {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 536ea2f..89af288 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -81,11 +81,9 @@
 	seq_printf(m, "DirectMap4M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
-#ifdef CONFIG_X86_64
 	if (direct_gbpages)
 		seq_printf(m, "DirectMap1G:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_1G] << 20);
-#endif
 }
 #else
 static inline void split_page_count(int level) { }
@@ -1654,13 +1652,11 @@
 {
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7ac6869..35af677 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -610,7 +610,7 @@
 }
 
 #ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -628,8 +628,8 @@
 
 	while (cursor < to) {
 		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
-				current->comm, from, to - 1);
+			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+			       current->comm, from, to - 1);
 			return 0;
 		}
 		cursor += PAGE_SIZE;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7b22ada..5a7e525 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -275,12 +275,87 @@
 	}
 }
 
+/*
+ * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
+ * assumes that pgd should be in one page.
+ *
+ * But kernel with PAE paging that is not running as a Xen domain
+ * only needs to allocate 32 bytes for pgd instead of one page.
+ */
+#ifdef CONFIG_X86_PAE
+
+#include <linux/slab.h>
+
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_ALIGN	32
+
+static struct kmem_cache *pgd_cache;
+
+static int __init pgd_cache_init(void)
+{
+	/*
+	 * When PAE kernel is running as a Xen domain, it does not use
+	 * shared kernel pmd. And this requires a whole page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return 0;
+
+	/*
+	 * when PAE kernel is not running as a Xen domain, it uses
+	 * shared kernel pmd. Shared kernel pmd does not require a whole
+	 * page for pgd. We are able to just allocate a 32-byte for pgd.
+	 * During boot time, we create a 32-byte slab for pgd table allocation.
+	 */
+	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
+				      SLAB_PANIC, NULL);
+	if (!pgd_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+core_initcall(pgd_cache_init);
+
+static inline pgd_t *_pgd_alloc(void)
+{
+	/*
+	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
+	 * We allocate one page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return (pgd_t *)__get_free_page(PGALLOC_GFP);
+
+	/*
+	 * Now PAE kernel is not running as a Xen domain. We can allocate
+	 * a 32-byte slab for pgd to save memory space.
+	 */
+	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	if (!SHARED_KERNEL_PMD)
+		free_page((unsigned long)pgd);
+	else
+		kmem_cache_free(pgd_cache, pgd);
+}
+#else
+static inline pgd_t *_pgd_alloc(void)
+{
+	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+#endif /* CONFIG_X86_PAE */
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	pmd_t *pmds[PREALLOCATED_PMDS];
 
-	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+	pgd = _pgd_alloc();
 
 	if (pgd == NULL)
 		goto out;
@@ -310,7 +385,7 @@
 out_free_pmds:
 	free_pmds(mm, pmds);
 out_free_pgd:
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 out:
 	return NULL;
 }
@@ -320,7 +395,7 @@
 	pgd_mop_up_pmds(mm, pgd);
 	pgd_dtor(pgd);
 	paravirt_pgd_free(mm, pgd);
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 }
 
 /*
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 5d04be5..4e664bd 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -111,7 +111,7 @@
 {
 	struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		unsigned long stack = kernel_stack_pointer(regs);
 		if (depth)
 			dump_trace(NULL, regs, (unsigned long *)stack, 0,
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2fb3847..8fd6f44 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -490,7 +490,9 @@
 	if (!bus) {
 		pci_free_resource_list(&resources);
 		kfree(sd);
+		return;
 	}
+	pci_bus_add_devices(bus);
 }
 
 void __init pcibios_set_cache_line_size(void)
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d143d21..d7f997f 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -67,7 +67,7 @@
 
 	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
 	if (!image) {
-		image = early_memremap(bgrt_tab->image_address,
+		image = early_ioremap(bgrt_tab->image_address,
 				       sizeof(bmp_header));
 		ioremapped = true;
 		if (!image) {
@@ -89,7 +89,7 @@
 	}
 
 	if (ioremapped) {
-		image = early_memremap(bgrt_tab->image_address,
+		image = early_ioremap(bgrt_tab->image_address,
 				       bmp_header.size);
 		if (!image) {
 			pr_err("Ignoring BGRT: failed to map image memory\n");
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index dbc8627..02744df 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -85,12 +85,20 @@
 	efi_memory_desc_t *virtual_map)
 {
 	efi_status_t status;
+	unsigned long flags;
+	pgd_t *save_pgd;
 
-	efi_call_phys_prolog();
+	save_pgd = efi_call_phys_prolog();
+
+	/* Disable interrupts around EFI calls: */
+	local_irq_save(flags);
 	status = efi_call_phys(efi_phys.set_virtual_address_map,
 			       memory_map_size, descriptor_size,
 			       descriptor_version, virtual_map);
-	efi_call_phys_epilog();
+	local_irq_restore(flags);
+
+	efi_call_phys_epilog(save_pgd);
+
 	return status;
 }
 
@@ -491,7 +499,8 @@
 	if (efi_memmap_init())
 		return;
 
-	print_efi_memmap();
+	if (efi_enabled(EFI_DBG))
+		print_efi_memmap();
 }
 
 void __init efi_late_init(void)
@@ -939,6 +948,8 @@
 {
 	if (parse_option_str(str, "old_map"))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
+	if (parse_option_str(str, "debug"))
+		set_bit(EFI_DBG, &efi.flags);
 
 	return 0;
 }
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e7cda..ed5b673 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,11 +33,10 @@
 
 /*
  * To make EFI call EFI runtime service in physical addressing mode we need
- * prolog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
+ * prolog/epilog before/after the invocation to claim the EFI runtime service
+ * handler exclusively and to duplicate a memory mapping in low memory space,
+ * say 0 - 3G.
  */
-static unsigned long efi_rt_eflags;
 
 void efi_sync_low_kernel_mappings(void) {}
 void __init efi_dump_pagetable(void) {}
@@ -57,21 +56,24 @@
 void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
 void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
 	struct desc_ptr gdt_descr;
+	pgd_t *save_pgd;
 
-	local_irq_save(efi_rt_eflags);
-
+	/* Current pgd is swapper_pg_dir, we'll restore it later: */
+	save_pgd = swapper_pg_dir;
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
 	gdt_descr.address = __pa(get_cpu_gdt_table(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
+
+	return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
@@ -79,10 +81,8 @@
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
-	load_cr3(swapper_pg_dir);
+	load_cr3(save_pgd);
 	__flush_tlb_all();
-
-	local_irq_restore(efi_rt_eflags);
 }
 
 void __init efi_runtime_mkexec(void)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 17e80d8..a0ac0f9 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,9 +41,6 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 
-static pgd_t *save_pgd __initdata;
-static unsigned long efi_flags __initdata;
-
 /*
  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
  * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
@@ -78,17 +75,18 @@
 	}
 }
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
 	unsigned long vaddress;
+	pgd_t *save_pgd;
+
 	int pgd;
 	int n_pgds;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP))
-		return;
+		return NULL;
 
 	early_code_mapping_set_exec(1);
-	local_irq_save(efi_flags);
 
 	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
 	save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
@@ -99,24 +97,29 @@
 		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
 	}
 	__flush_tlb_all();
+
+	return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	/*
 	 * After the lock is released, the original page table is restored.
 	 */
-	int pgd;
-	int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+	int pgd_idx;
+	int nr_pgds;
 
-	if (!efi_enabled(EFI_OLD_MEMMAP))
+	if (!save_pgd)
 		return;
 
-	for (pgd = 0; pgd < n_pgds; pgd++)
-		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
 	kfree(save_pgd);
+
 	__flush_tlb_all();
-	local_irq_restore(efi_flags);
 	early_code_mapping_set_exec(0);
 }
 
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index c9a0838..278e4da 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -11,6 +11,7 @@
  */
 
 #include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
 #include <asm/imr.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -101,6 +102,12 @@
 	}
 }
 
+static const struct x86_cpu_id imr_ids[] __initconst = {
+	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, imr_ids);
+
 /**
  * imr_self_test_init - entry point for IMR driver.
  *
@@ -108,7 +115,8 @@
  */
 static int __init imr_self_test_init(void)
 {
-	imr_self_test();
+	if (x86_match_cpu(imr_ids))
+		imr_self_test();
 	return 0;
 }
 
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 9a2e590..7fa8b3b 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -61,7 +61,7 @@
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
@@ -71,7 +71,7 @@
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 08e350e..5513084 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -83,7 +83,7 @@
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
@@ -93,7 +93,7 @@
 
 	if (psy) {
 		power_supply_changed(psy);
-		put_device(psy->dev);
+		power_supply_put(psy);
 	}
 }
 
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 9947985..3b6ec42 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -415,7 +415,7 @@
 	struct reset_args reset_args;
 
 	reset_args.sender = sender;
-	cpus_clear(*mask);
+	cpumask_clear(mask);
 	/* find a single cpu for each uvhub in this distribution mask */
 	maskbits = sizeof(struct pnmask) * BITSPERBYTE;
 	/* each bit is a pnode relative to the partition base pnode */
@@ -425,7 +425,7 @@
 			continue;
 		apnode = pnode + bcp->partition_base_pnode;
 		cpu = pnode_to_first_cpu(apnode, smaster);
-		cpu_set(cpu, *mask);
+		cpumask_set_cpu(cpu, mask);
 	}
 
 	/* IPI all cpus; preemption is already disabled */
@@ -1126,7 +1126,7 @@
 	/* don't actually do a shootdown of the local cpu */
 	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 
-	if (cpu_isset(cpu, *cpumask))
+	if (cpumask_test_cpu(cpu, cpumask))
 		stat->s_ntargself++;
 
 	bau_desc = bcp->descriptor_base;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 3e32ed5..757678f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -134,7 +134,7 @@
 static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
 	struct desc_struct *desc = get_cpu_gdt_table(cpu);
 	tss_desc tss;
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index b3560ec..ef8187f 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -119,7 +119,7 @@
 110	i386	iopl			sys_iopl
 111	i386	vhangup			sys_vhangup
 112	i386	idle
-113	i386	vm86old			sys_vm86old			sys32_vm86_warning
+113	i386	vm86old			sys_vm86old			sys_ni_syscall
 114	i386	wait4			sys_wait4			compat_sys_wait4
 115	i386	swapoff			sys_swapoff
 116	i386	sysinfo			sys_sysinfo			compat_sys_sysinfo
@@ -172,7 +172,7 @@
 163	i386	mremap			sys_mremap
 164	i386	setresuid		sys_setresuid16
 165	i386	getresuid		sys_getresuid16
-166	i386	vm86			sys_vm86			sys32_vm86_warning
+166	i386	vm86			sys_vm86			sys_ni_syscall
 167	i386	query_module
 168	i386	poll			sys_poll
 169	i386	nfsservctl
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 8d656fb..9ef32d5 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -178,7 +178,7 @@
 169	common	reboot			sys_reboot
 170	common	sethostname		sys_sethostname
 171	common	setdomainname		sys_setdomainname
-172	common	iopl			stub_iopl
+172	common	iopl			sys_iopl
 173	common	ioperm			sys_ioperm
 174	64	create_module
 175	common	init_module		sys_init_module
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 2d7d9a1..8ffd214 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -64,8 +64,8 @@
  */
 static inline void rdtsc_barrier(void)
 {
-	alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+			  "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 5cdfa9d..a75d8700 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -16,7 +16,7 @@
  */
 
 /* Not going to be implemented by UML, since we have no hardware. */
-#define stub_iopl sys_ni_syscall
+#define sys_iopl sys_ni_syscall
 #define sys_ioperm sys_ni_syscall
 
 /*
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 7b9be98..275a3a8 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -51,7 +51,7 @@
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
@@ -206,4 +206,4 @@
 PHONY += vdso_install $(vdso_img_insttargets)
 vdso_install: $(vdso_img_insttargets) FORCE
 
-clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64*
+clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322..40d2473 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,18 +82,15 @@
 	cycle_t ret;
 	u64 last;
 	u32 version;
+	u32 migrate_count;
 	u8 flags;
 	unsigned cpu, cpu1;
 
 
 	/*
-	 * Note: hypervisor must guarantee that:
-	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
-	 * 2. that per-CPU pvclock time info is updated if the
-	 *    underlying CPU changes.
-	 * 3. that version is increased whenever underlying CPU
-	 *    changes.
-	 *
+	 * When looping to get a consistent (time-info, tsc) pair, we
+	 * also need to deal with the possibility we can switch vcpus,
+	 * so make sure we always re-fetch time-info for the current vcpu.
 	 */
 	do {
 		cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -102,20 +99,27 @@
 		 * __getcpu() calls (Gleb).
 		 */
 
-		pvti = get_pvti(cpu);
+		/* Make sure migrate_count will change if we leave the VCPU. */
+		do {
+			pvti = get_pvti(cpu);
+			migrate_count = pvti->migrate_count;
+
+			cpu1 = cpu;
+			cpu = __getcpu() & VGETCPU_CPU_MASK;
+		} while (unlikely(cpu != cpu1));
 
 		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
 		/*
 		 * Test we're still on the cpu as well as the version.
-		 * We could have been migrated just after the first
-		 * vgetcpu but before fetching the version, so we
-		 * wouldn't notice a version change.
+		 * - We must read TSC of pvti's VCPU.
+		 * - KVM doesn't follow the versioning protocol, so data could
+		 *   change before version if we left the VCPU.
 		 */
-		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
-	} while (unlikely(cpu != cpu1 ||
-			  (pvti->pvti.version & 1) ||
-			  pvti->pvti.version != version));
+		smp_rmb();
+	} while (unlikely((pvti->pvti.version & 1) ||
+			  pvti->pvti.version != version ||
+			  pvti->migrate_count != migrate_count));
 
 	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
 		*mode = VCLOCK_NONE;
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S
index 5415b56..6b286bb 100644
--- a/arch/x86/vdso/vdso32/syscall.S
+++ b/arch/x86/vdso/vdso32/syscall.S
@@ -19,8 +19,6 @@
 .Lpush_ebp:
 	movl	%ecx, %ebp
 	syscall
-	movl	$__USER32_DS, %ecx
-	movl	%ecx, %ss
 	movl	%ebp, %ecx
 	popl	%ebp
 .Lpop_ebp:
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5240f56..81665c9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -912,6 +912,7 @@
 	mcs = xen_mc_entry(0);
 	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
+	tss->x86_tss.sp0 = thread->sp0;
 }
 
 static void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 08e8489..7413ee3 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -445,15 +445,7 @@
 {
 	int rc;
 
-	per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
-	irq_ctx_init(cpu);
-#else
-	clear_tsk_thread_flag(idle, TIF_FORK);
-#endif
-	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
+	common_cpu_up(cpu, idle);
 
 	xen_setup_runstate_info(cpu);
 	xen_setup_timer(cpu);
@@ -468,10 +460,6 @@
 	if (rc)
 		return rc;
 
-	if (num_online_cpus() == 1)
-		/* Just in case we booted with a single CPU. */
-		alternatives_enable_smp();
-
 	rc = xen_smp_intr_init(cpu);
 	if (rc)
 		return rc;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index c4df9db..d949769 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,5 +1,5 @@
 #include <linux/types.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
@@ -81,17 +81,14 @@
 
 static void xen_vcpu_notify_restore(void *data)
 {
-	unsigned long reason = (unsigned long)data;
-
 	/* Boot processor notified via generic timekeeping_resume() */
-	if ( smp_processor_id() == 0)
+	if (smp_processor_id() == 0)
 		return;
 
-	clockevents_notify(reason, NULL);
+	tick_resume_local();
 }
 
 void xen_arch_resume(void)
 {
-	on_each_cpu(xen_vcpu_notify_restore,
-		    (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
+	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 53adefd..985fc3e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -68,11 +68,11 @@
 	 * We're already on the usermode stack at this point, but
 	 * still with the kernel gs, so we can easily switch back
 	 */
-	movq %rsp, PER_CPU_VAR(old_rsp)
+	movq %rsp, PER_CPU_VAR(rsp_scratch)
 	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER_DS
-	pushq PER_CPU_VAR(old_rsp)
+	pushq PER_CPU_VAR(rsp_scratch)
 	pushq %r11
 	pushq $__USER_CS
 	pushq %rcx
@@ -87,11 +87,11 @@
 	 * We're already on the usermode stack at this point, but
 	 * still with the kernel gs, so we can easily switch back
 	 */
-	movq %rsp, PER_CPU_VAR(old_rsp)
+	movq %rsp, PER_CPU_VAR(rsp_scratch)
 	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER32_DS
-	pushq PER_CPU_VAR(old_rsp)
+	pushq PER_CPU_VAR(rsp_scratch)
 	pushq %r11
 	pushq $__USER32_CS
 	pushq %rcx
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 5b34033..b848cc3 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -174,7 +174,7 @@
 	struct pci_controller *pci_ctrl;
 	struct list_head resources;
 	struct pci_bus *bus;
-	int next_busno = 0;
+	int next_busno = 0, ret;
 
 	printk("PCI: Probing PCI hardware\n");
 
@@ -185,14 +185,25 @@
 		pci_controller_apertures(pci_ctrl, &resources);
 		bus = pci_scan_root_bus(NULL, pci_ctrl->first_busno,
 					pci_ctrl->ops, pci_ctrl, &resources);
+		if (!bus)
+			continue;
+
 		pci_ctrl->bus = bus;
 		pci_ctrl->last_busno = bus->busn_res.end;
 		if (next_busno <= pci_ctrl->last_busno)
 			next_busno = pci_ctrl->last_busno+1;
 	}
 	pci_bus_count = next_busno;
+	ret = platform_pcibios_fixup();
+	if (ret)
+		return ret;
 
-	return platform_pcibios_fixup();
+	for (pci_ctrl = pci_ctrl_head; pci_ctrl; pci_ctrl = pci_ctrl->next) {
+		if (pci_ctrl->bus)
+			pci_bus_add_devices(pci_ctrl->bus);
+	}
+
+	return 0;
 }
 
 subsys_initcall(pcibios_init);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b7b8933..33c4285 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1457,7 +1457,7 @@
 
 		do {
 			page = alloc_pages_node(set->numa_node,
-				GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+				GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
 				this_order);
 			if (page)
 				break;
@@ -1479,8 +1479,6 @@
 		left -= to_do * rq_size;
 		for (j = 0; j < to_do; j++) {
 			tags->rqs[i] = p;
-			tags->rqs[i]->atomic_flags = 0;
-			tags->rqs[i]->cmd_flags = 0;
 			if (set->ops->init_request) {
 				if (set->ops->init_request(set->driver_data,
 						tags->rqs[i], hctx_idx, i,
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 36b0e61..bbcc2b5 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -95,13 +95,14 @@
 };
 
 struct acpi_ac {
-	struct power_supply charger;
+	struct power_supply *charger;
+	struct power_supply_desc charger_desc;
 	struct acpi_device * device;
 	unsigned long long state;
 	struct notifier_block battery_nb;
 };
 
-#define to_acpi_ac(x) container_of(x, struct acpi_ac, charger)
+#define to_acpi_ac(x) power_supply_get_drvdata(x)
 
 #ifdef CONFIG_ACPI_PROCFS_POWER
 static const struct file_operations acpi_ac_fops = {
@@ -275,7 +276,7 @@
 						  dev_name(&device->dev), event,
 						  (u32) ac->state);
 		acpi_notifier_call_chain(device, event, (u32) ac->state);
-		kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
+		kobject_uevent(&ac->charger->dev.kobj, KOBJ_CHANGE);
 	}
 
 	return;
@@ -321,6 +322,7 @@
 
 static int acpi_ac_add(struct acpi_device *device)
 {
+	struct power_supply_config psy_cfg = {};
 	int result = 0;
 	struct acpi_ac *ac = NULL;
 
@@ -341,19 +343,24 @@
 	if (result)
 		goto end;
 
-	ac->charger.name = acpi_device_bid(device);
+	psy_cfg.drv_data = ac;
+
+	ac->charger_desc.name = acpi_device_bid(device);
 #ifdef CONFIG_ACPI_PROCFS_POWER
 	result = acpi_ac_add_fs(ac);
 	if (result)
 		goto end;
 #endif
-	ac->charger.type = POWER_SUPPLY_TYPE_MAINS;
-	ac->charger.properties = ac_props;
-	ac->charger.num_properties = ARRAY_SIZE(ac_props);
-	ac->charger.get_property = get_ac_property;
-	result = power_supply_register(&ac->device->dev, &ac->charger);
-	if (result)
+	ac->charger_desc.type = POWER_SUPPLY_TYPE_MAINS;
+	ac->charger_desc.properties = ac_props;
+	ac->charger_desc.num_properties = ARRAY_SIZE(ac_props);
+	ac->charger_desc.get_property = get_ac_property;
+	ac->charger = power_supply_register(&ac->device->dev,
+					    &ac->charger_desc, &psy_cfg);
+	if (IS_ERR(ac->charger)) {
+		result = PTR_ERR(ac->charger);
 		goto end;
+	}
 
 	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
@@ -390,7 +397,7 @@
 	if (acpi_ac_get_state(ac))
 		return 0;
 	if (old_state != ac->state)
-		kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
+		kobject_uevent(&ac->charger->dev.kobj, KOBJ_CHANGE);
 	return 0;
 }
 #else
@@ -407,8 +414,7 @@
 
 	ac = acpi_driver_data(device);
 
-	if (ac->charger.dev)
-		power_supply_unregister(&ac->charger);
+	power_supply_unregister(ac->charger);
 	unregister_acpi_notifier(&ac->battery_nb);
 
 #ifdef CONFIG_ACPI_PROCFS_POWER
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index c7b105c..6bc9cbc 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -26,7 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/cpu.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <asm/mwait.h>
@@ -41,8 +41,6 @@
 
 static unsigned char tsc_detected_unstable;
 static unsigned char tsc_marked_unstable;
-static unsigned char lapic_detected_unstable;
-static unsigned char lapic_marked_unstable;
 
 static void power_saving_mwait_init(void)
 {
@@ -82,13 +80,10 @@
 		 */
 		if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			tsc_detected_unstable = 1;
-		if (!boot_cpu_has(X86_FEATURE_ARAT))
-			lapic_detected_unstable = 1;
 		break;
 	default:
-		/* TSC & LAPIC could halt in idle */
+		/* TSC could halt in idle */
 		tsc_detected_unstable = 1;
-		lapic_detected_unstable = 1;
 	}
 #endif
 }
@@ -155,7 +150,6 @@
 	sched_setscheduler(current, SCHED_RR, &param);
 
 	while (!kthread_should_stop()) {
-		int cpu;
 		unsigned long expire_time;
 
 		try_to_freeze();
@@ -177,28 +171,15 @@
 				mark_tsc_unstable("TSC halts in idle");
 				tsc_marked_unstable = 1;
 			}
-			if (lapic_detected_unstable && !lapic_marked_unstable) {
-				int i;
-				/* LAPIC could halt in idle, so notify users */
-				for_each_online_cpu(i)
-					clockevents_notify(
-						CLOCK_EVT_NOTIFY_BROADCAST_ON,
-						&i);
-				lapic_marked_unstable = 1;
-			}
 			local_irq_disable();
-			cpu = smp_processor_id();
-			if (lapic_marked_unstable)
-				clockevents_notify(
-					CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+			tick_broadcast_enable();
+			tick_broadcast_enter();
 			stop_critical_timings();
 
 			mwait_idle_with_hints(power_saving_mwait_eax, 1);
 
 			start_critical_timings();
-			if (lapic_marked_unstable)
-				clockevents_notify(
-					CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+			tick_broadcast_exit();
 			local_irq_enable();
 
 			if (time_before(expire_time, jiffies)) {
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index d98ba43..fdc16ce 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -117,7 +117,8 @@
 struct acpi_battery {
 	struct mutex lock;
 	struct mutex sysfs_lock;
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct acpi_device *device;
 	struct notifier_block pm_nb;
 	unsigned long update_time;
@@ -149,7 +150,7 @@
 	unsigned long flags;
 };
 
-#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat)
+#define to_acpi_battery(x) power_supply_get_drvdata(x)
 
 static inline int acpi_battery_present(struct acpi_battery *battery)
 {
@@ -608,40 +609,45 @@
 
 static int sysfs_add_battery(struct acpi_battery *battery)
 {
-	int result;
+	struct power_supply_config psy_cfg = { .drv_data = battery, };
 
 	if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
-		battery->bat.properties = charge_battery_props;
-		battery->bat.num_properties =
+		battery->bat_desc.properties = charge_battery_props;
+		battery->bat_desc.num_properties =
 			ARRAY_SIZE(charge_battery_props);
 	} else {
-		battery->bat.properties = energy_battery_props;
-		battery->bat.num_properties =
+		battery->bat_desc.properties = energy_battery_props;
+		battery->bat_desc.num_properties =
 			ARRAY_SIZE(energy_battery_props);
 	}
 
-	battery->bat.name = acpi_device_bid(battery->device);
-	battery->bat.type = POWER_SUPPLY_TYPE_BATTERY;
-	battery->bat.get_property = acpi_battery_get_property;
+	battery->bat_desc.name = acpi_device_bid(battery->device);
+	battery->bat_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->bat_desc.get_property = acpi_battery_get_property;
 
-	result = power_supply_register_no_ws(&battery->device->dev, &battery->bat);
+	battery->bat = power_supply_register_no_ws(&battery->device->dev,
+				&battery->bat_desc, &psy_cfg);
 
-	if (result)
+	if (IS_ERR(battery->bat)) {
+		int result = PTR_ERR(battery->bat);
+
+		battery->bat = NULL;
 		return result;
-	return device_create_file(battery->bat.dev, &alarm_attr);
+	}
+	return device_create_file(&battery->bat->dev, &alarm_attr);
 }
 
 static void sysfs_remove_battery(struct acpi_battery *battery)
 {
 	mutex_lock(&battery->sysfs_lock);
-	if (!battery->bat.dev) {
+	if (!battery->bat) {
 		mutex_unlock(&battery->sysfs_lock);
 		return;
 	}
 
-	device_remove_file(battery->bat.dev, &alarm_attr);
-	power_supply_unregister(&battery->bat);
-	battery->bat.dev = NULL;
+	device_remove_file(&battery->bat->dev, &alarm_attr);
+	power_supply_unregister(battery->bat);
+	battery->bat = NULL;
 	mutex_unlock(&battery->sysfs_lock);
 }
 
@@ -738,7 +744,7 @@
 			return result;
 		acpi_battery_init_alarm(battery);
 	}
-	if (!battery->bat.dev) {
+	if (!battery->bat) {
 		result = sysfs_add_battery(battery);
 		if (result)
 			return result;
@@ -764,7 +770,7 @@
 {
 	int power_unit;
 
-	if (!battery->bat.dev)
+	if (!battery->bat)
 		return;
 
 	power_unit = battery->power_unit;
@@ -1062,11 +1068,11 @@
 static void acpi_battery_notify(struct acpi_device *device, u32 event)
 {
 	struct acpi_battery *battery = acpi_driver_data(device);
-	struct device *old;
+	struct power_supply *old;
 
 	if (!battery)
 		return;
-	old = battery->bat.dev;
+	old = battery->bat;
 	/*
 	* On Acer Aspire V5-573G notifications are sometimes triggered too
 	* early. For example, when AC is unplugged and notification is
@@ -1083,8 +1089,8 @@
 					acpi_battery_present(battery));
 	acpi_notifier_call_chain(device, event, acpi_battery_present(battery));
 	/* acpi_battery_update could remove power_supply object */
-	if (old && battery->bat.dev)
-		power_supply_changed(&battery->bat);
+	if (old && battery->bat)
+		power_supply_changed(battery->bat);
 }
 
 static int battery_notify(struct notifier_block *nb,
@@ -1100,7 +1106,7 @@
 		if (!acpi_battery_present(battery))
 			return 0;
 
-		if (!battery->bat.dev) {
+		if (battery->bat) {
 			result = acpi_battery_get_info(battery);
 			if (result)
 				return result;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 68a5f71..1b5569c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -423,8 +423,7 @@
 }
 EXPORT_SYMBOL(acpi_pci_osc_control_set);
 
-static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm,
-				 int *clear_aspm)
+static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm)
 {
 	u32 support, control, requested;
 	acpi_status status;
@@ -495,10 +494,12 @@
 		decode_osc_control(root, "OS now controls", control);
 		if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
 			/*
-			 * We have ASPM control, but the FADT indicates
-			 * that it's unsupported. Clear it.
+			 * We have ASPM control, but the FADT indicates that
+			 * it's unsupported. Leave existing configuration
+			 * intact and prevent the OS from touching it.
 			 */
-			*clear_aspm = 1;
+			dev_info(&device->dev, "FADT indicates ASPM is unsupported, using BIOS configuration\n");
+			*no_aspm = 1;
 		}
 	} else {
 		decode_osc_control(root, "OS requested", requested);
@@ -525,7 +526,7 @@
 	int result;
 	struct acpi_pci_root *root;
 	acpi_handle handle = device->handle;
-	int no_aspm = 0, clear_aspm = 0;
+	int no_aspm = 0;
 	bool hotadd = system_state != SYSTEM_BOOTING;
 
 	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
@@ -584,7 +585,7 @@
 
 	root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle);
 
-	negotiate_os_control(root, &no_aspm, &clear_aspm);
+	negotiate_os_control(root, &no_aspm);
 
 	/*
 	 * TBD: Need PCI interface for enumeration/configuration of roots.
@@ -607,10 +608,6 @@
 		goto remove_dmar;
 	}
 
-	if (clear_aspm) {
-		dev_info(&device->dev, "Disabling ASPM (FADT indicates it is unsupported)\n");
-		pcie_clear_aspm(root->bus);
-	}
 	if (no_aspm)
 		pcie_no_aspm();
 
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index c6bb9f1..39e0c8e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -32,7 +32,7 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/sched.h>       /* need_resched() */
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/syscore_ops.h>
 #include <acpi/processor.h>
@@ -157,12 +157,11 @@
 static void __lapic_timer_propagate_broadcast(void *arg)
 {
 	struct acpi_processor *pr = (struct acpi_processor *) arg;
-	unsigned long reason;
 
-	reason = pr->power.timer_broadcast_on_state < INT_MAX ?
-		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
-
-	clockevents_notify(reason, &pr->id);
+	if (pr->power.timer_broadcast_on_state < INT_MAX)
+		tick_broadcast_enable();
+	else
+		tick_broadcast_disable();
 }
 
 static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
@@ -179,11 +178,10 @@
 	int state = cx - pr->power.states;
 
 	if (state >= pr->power.timer_broadcast_on_state) {
-		unsigned long reason;
-
-		reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
-			CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
-		clockevents_notify(reason, &pr->id);
+		if (broadcast)
+			tick_broadcast_enter();
+		else
+			tick_broadcast_exit();
 	}
 }
 
@@ -922,7 +920,7 @@
 		return -EINVAL;
 
 	drv->safe_state_index = -1;
-	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
+	for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) {
 		drv->states[i].name[0] = '\0';
 		drv->states[i].desc[0] = '\0';
 	}
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index a7a3edd..cd82762 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -74,7 +74,8 @@
 MODULE_DEVICE_TABLE(acpi, sbs_device_ids);
 
 struct acpi_battery {
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct acpi_sbs *sbs;
 	unsigned long update_time;
 	char name[8];
@@ -101,10 +102,10 @@
 	u8 have_sysfs_alarm:1;
 };
 
-#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat)
+#define to_acpi_battery(x) power_supply_get_drvdata(x)
 
 struct acpi_sbs {
-	struct power_supply charger;
+	struct power_supply *charger;
 	struct acpi_device *device;
 	struct acpi_smb_hc *hc;
 	struct mutex lock;
@@ -115,7 +116,7 @@
 	u8 charger_exists:1;
 };
 
-#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
+#define to_acpi_sbs(x) power_supply_get_drvdata(x)
 
 static int acpi_sbs_remove(struct acpi_device *device);
 static int acpi_battery_get_state(struct acpi_battery *battery);
@@ -303,6 +304,13 @@
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
+static const struct power_supply_desc acpi_sbs_charger_desc = {
+	.name		= "sbs-charger",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= sbs_ac_props,
+	.num_properties	= ARRAY_SIZE(sbs_ac_props),
+	.get_property	= sbs_get_ac_property,
+};
 
 /* --------------------------------------------------------------------------
                             Smart Battery System Management
@@ -519,6 +527,7 @@
 static int acpi_battery_add(struct acpi_sbs *sbs, int id)
 {
 	struct acpi_battery *battery = &sbs->battery[id];
+	struct power_supply_config psy_cfg = { .drv_data = battery, };
 	int result;
 
 	battery->id = id;
@@ -528,23 +537,27 @@
 		return result;
 
 	sprintf(battery->name, ACPI_BATTERY_DIR_NAME, id);
-	battery->bat.name = battery->name;
-	battery->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->bat_desc.name = battery->name;
+	battery->bat_desc.type = POWER_SUPPLY_TYPE_BATTERY;
 	if (!acpi_battery_mode(battery)) {
-		battery->bat.properties = sbs_charge_battery_props;
-		battery->bat.num_properties =
+		battery->bat_desc.properties = sbs_charge_battery_props;
+		battery->bat_desc.num_properties =
 		    ARRAY_SIZE(sbs_charge_battery_props);
 	} else {
-		battery->bat.properties = sbs_energy_battery_props;
-		battery->bat.num_properties =
+		battery->bat_desc.properties = sbs_energy_battery_props;
+		battery->bat_desc.num_properties =
 		    ARRAY_SIZE(sbs_energy_battery_props);
 	}
-	battery->bat.get_property = acpi_sbs_battery_get_property;
-	result = power_supply_register(&sbs->device->dev, &battery->bat);
-	if (result)
+	battery->bat_desc.get_property = acpi_sbs_battery_get_property;
+	battery->bat = power_supply_register(&sbs->device->dev,
+					&battery->bat_desc, &psy_cfg);
+	if (IS_ERR(battery->bat)) {
+		result = PTR_ERR(battery->bat);
+		battery->bat = NULL;
 		goto end;
+	}
 
-	result = device_create_file(battery->bat.dev, &alarm_attr);
+	result = device_create_file(&battery->bat->dev, &alarm_attr);
 	if (result)
 		goto end;
 	battery->have_sysfs_alarm = 1;
@@ -559,28 +572,29 @@
 {
 	struct acpi_battery *battery = &sbs->battery[id];
 
-	if (battery->bat.dev) {
+	if (battery->bat) {
 		if (battery->have_sysfs_alarm)
-			device_remove_file(battery->bat.dev, &alarm_attr);
-		power_supply_unregister(&battery->bat);
+			device_remove_file(&battery->bat->dev, &alarm_attr);
+		power_supply_unregister(battery->bat);
 	}
 }
 
 static int acpi_charger_add(struct acpi_sbs *sbs)
 {
 	int result;
+	struct power_supply_config psy_cfg = { .drv_data = sbs, };
 
 	result = acpi_ac_get_present(sbs);
 	if (result)
 		goto end;
 
 	sbs->charger_exists = 1;
-	sbs->charger.name = "sbs-charger";
-	sbs->charger.type = POWER_SUPPLY_TYPE_MAINS;
-	sbs->charger.properties = sbs_ac_props;
-	sbs->charger.num_properties = ARRAY_SIZE(sbs_ac_props);
-	sbs->charger.get_property = sbs_get_ac_property;
-	power_supply_register(&sbs->device->dev, &sbs->charger);
+	sbs->charger = power_supply_register(&sbs->device->dev,
+					&acpi_sbs_charger_desc, &psy_cfg);
+	if (IS_ERR(sbs->charger)) {
+		result = PTR_ERR(sbs->charger);
+		sbs->charger = NULL;
+	}
 	printk(KERN_INFO PREFIX "%s [%s]: AC Adapter [%s] (%s)\n",
 	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
 	       ACPI_AC_DIR_NAME, sbs->charger_present ? "on-line" : "off-line");
@@ -590,8 +604,8 @@
 
 static void acpi_charger_remove(struct acpi_sbs *sbs)
 {
-	if (sbs->charger.dev)
-		power_supply_unregister(&sbs->charger);
+	if (sbs->charger)
+		power_supply_unregister(sbs->charger);
 }
 
 static void acpi_sbs_callback(void *context)
@@ -605,7 +619,7 @@
 	if (sbs->charger_exists) {
 		acpi_ac_get_present(sbs);
 		if (sbs->charger_present != saved_charger_state)
-			kobject_uevent(&sbs->charger.dev->kobj, KOBJ_CHANGE);
+			kobject_uevent(&sbs->charger->dev.kobj, KOBJ_CHANGE);
 	}
 
 	if (sbs->manager_present) {
@@ -617,7 +631,7 @@
 			acpi_battery_read(bat);
 			if (saved_battery_state == bat->present)
 				continue;
-			kobject_uevent(&bat->bat.dev->kobj, KOBJ_CHANGE);
+			kobject_uevent(&bat->bat->dev.kobj, KOBJ_CHANGE);
 		}
 	}
 }
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index ae41107..b67e995 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -111,7 +111,8 @@
 # Should be last libata driver
 obj-$(CONFIG_PATA_LEGACY)	+= pata_legacy.o
 
-libata-y	:= libata-core.o libata-scsi.o libata-eh.o libata-transport.o
+libata-y	:= libata-core.o libata-scsi.o libata-eh.o \
+	libata-transport.o libata-trace.o
 libata-$(CONFIG_ATA_SFF)	+= libata-sff.o
 libata-$(CONFIG_SATA_PMP)	+= libata-pmp.o
 libata-$(CONFIG_ATA_ACPI)	+= libata-acpi.o
diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index c962886d..12489ce 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -181,10 +181,10 @@
 	int rc;
 
 	if (using_dac &&
-	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	    !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 			if (rc) {
 				dev_err(&pdev->dev,
 					   "64-bit DMA enable failed\n");
@@ -192,12 +192,12 @@
 			}
 		}
 	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 			return rc;
 		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev,
 				"32-bit consistent DMA enable failed\n");
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 33bb06e..c7a92a7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -738,10 +738,10 @@
 		return 0;
 
 	if (using_dac &&
-	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	    !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 			if (rc) {
 				dev_err(&pdev->dev,
 					"64-bit DMA enable failed\n");
@@ -749,12 +749,12 @@
 			}
 		}
 	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 			return rc;
 		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev,
 				"32-bit consistent DMA enable failed\n");
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index bc971af..ea0ff00 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -68,8 +68,6 @@
 		}
 	}
 
-	st_ahci_configure_oob(drv_data->hpriv->mmio);
-
 	if (drv_data->sw_rst) {
 		err = reset_control_deassert(drv_data->sw_rst);
 		if (err) {
@@ -172,6 +170,8 @@
 	if (err)
 		return err;
 
+	st_ahci_configure_oob(drv_data->hpriv->mmio);
+
 	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info,
 				      &ahci_platform_sht);
 	if (err) {
@@ -222,6 +222,8 @@
 		return err;
 	}
 
+	st_ahci_configure_oob(drv_data->hpriv->mmio);
+
 	return ahci_platform_resume_host(dev);
 }
 #endif
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 2e8bb60..2b78510 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -22,6 +22,7 @@
  * NOTE: PM support is not currently available.
  *
  */
+#include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/ahci_platform.h>
@@ -718,6 +719,14 @@
 	return rc;
 }
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_ahci_acpi_match[] = {
+	{ "APMC0D0D", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, xgene_ahci_acpi_match);
+#endif
+
 static const struct of_device_id xgene_ahci_of_match[] = {
 	{.compatible = "apm,xgene-ahci"},
 	{},
@@ -730,6 +739,7 @@
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = xgene_ahci_of_match,
+		.acpi_match_table = ACPI_PTR(xgene_ahci_acpi_match),
 	},
 };
 
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 23dac3b..f6cb1f1 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -70,6 +70,9 @@
 #include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/libata.h>
+
 #include "libata.h"
 #include "libata-transport.h"
 
@@ -691,11 +694,11 @@
  *	RETURNS:
  *	Block address read from @tf.
  */
-u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev)
+u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev)
 {
 	u64 block = 0;
 
-	if (tf->flags & ATA_TFLAG_LBA) {
+	if (!dev || tf->flags & ATA_TFLAG_LBA) {
 		if (tf->flags & ATA_TFLAG_LBA48) {
 			block |= (u64)tf->hob_lbah << 40;
 			block |= (u64)tf->hob_lbam << 32;
@@ -2144,6 +2147,24 @@
 	return 0;
 }
 
+static void ata_dev_config_sense_reporting(struct ata_device *dev)
+{
+	unsigned int err_mask;
+
+	if (!ata_id_has_sense_reporting(dev->id))
+		return;
+
+	if (ata_id_sense_reporting_enabled(dev->id))
+		return;
+
+	err_mask = ata_dev_set_feature(dev, SETFEATURE_SENSE_DATA, 0x1);
+	if (err_mask) {
+		ata_dev_dbg(dev,
+			    "failed to enable Sense Data Reporting, Emask 0x%x\n",
+			    err_mask);
+	}
+}
+
 /**
  *	ata_dev_configure - Configure the specified ATA/ATAPI device
  *	@dev: Target device to configure
@@ -2366,7 +2387,7 @@
 					dev->devslp_timing[i] = sata_setting[j];
 				}
 		}
-
+		ata_dev_config_sense_reporting(dev);
 		dev->cdb_len = 16;
 	}
 
@@ -4897,6 +4918,7 @@
 		 */
 		if (unlikely(ata_tag_internal(qc->tag))) {
 			fill_result_tf(qc);
+			trace_ata_qc_complete_internal(qc);
 			__ata_qc_complete(qc);
 			return;
 		}
@@ -4907,6 +4929,7 @@
 		 */
 		if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
 			fill_result_tf(qc);
+			trace_ata_qc_complete_failed(qc);
 			ata_qc_schedule_eh(qc);
 			return;
 		}
@@ -4917,6 +4940,7 @@
 		if (qc->flags & ATA_QCFLAG_RESULT_TF)
 			fill_result_tf(qc);
 
+		trace_ata_qc_complete_done(qc);
 		/* Some commands need post-processing after successful
 		 * completion.
 		 */
@@ -5064,7 +5088,7 @@
 	}
 
 	ap->ops->qc_prep(qc);
-
+	trace_ata_qc_issue(qc);
 	qc->err_mask |= ap->ops->qc_issue(qc);
 	if (unlikely(qc->err_mask))
 		goto err;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index d2029a4..07f41be 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -46,6 +46,7 @@
 
 #include <linux/libata.h>
 
+#include <trace/events/libata.h>
 #include "libata.h"
 
 enum {
@@ -1510,13 +1511,18 @@
 	DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page);
 
 	ata_tf_init(dev, &tf);
-	tf.command = ATA_CMD_READ_LOG_EXT;
+	if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id)) {
+		tf.command = ATA_CMD_READ_LOG_DMA_EXT;
+		tf.protocol = ATA_PROT_DMA;
+	} else {
+		tf.command = ATA_CMD_READ_LOG_EXT;
+		tf.protocol = ATA_PROT_PIO;
+	}
 	tf.lbal = log;
 	tf.lbam = page;
 	tf.nsect = sectors;
 	tf.hob_nsect = sectors >> 8;
 	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
-	tf.protocol = ATA_PROT_PIO;
 
 	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
 				     buf, sectors * ATA_SECT_SIZE, 0);
@@ -1575,6 +1581,8 @@
 	tf->hob_lbah = buf[10];
 	tf->nsect = buf[12];
 	tf->hob_nsect = buf[13];
+	if (ata_id_has_ncq_autosense(dev->id))
+		tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
 
 	return 0;
 }
@@ -1611,6 +1619,70 @@
 }
 
 /**
+ *	ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT
+ *	@dev: device to perform REQUEST_SENSE_SENSE_DATA_EXT to
+ *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
+ *	@dfl_sense_key: default sense key to use
+ *
+ *	Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK
+ *	SENSE.  This function is EH helper.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	encoded sense data on success, 0 on failure or if sense data
+ *	is not available.
+ */
+static u32 ata_eh_request_sense(struct ata_queued_cmd *qc,
+				struct scsi_cmnd *cmd)
+{
+	struct ata_device *dev = qc->dev;
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+
+	if (!cmd)
+		return 0;
+
+	DPRINTK("ATA request sense\n");
+	ata_dev_warn(dev, "request sense\n");
+	if (!ata_id_sense_reporting_enabled(dev->id)) {
+		ata_dev_warn(qc->dev, "sense data reporting disabled\n");
+		return 0;
+	}
+	ata_tf_init(dev, &tf);
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
+	tf.command = ATA_CMD_REQ_SENSE_DATA;
+	tf.protocol = ATA_PROT_NODATA;
+
+	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+	/*
+	 * ACS-4 states:
+	 * The device may set the SENSE DATA AVAILABLE bit to one in the
+	 * STATUS field and clear the ERROR bit to zero in the STATUS field
+	 * to indicate that the command returned completion without an error
+	 * and the sense data described in table 306 is available.
+	 *
+	 * IOW the 'ATA_SENSE' bit might not be set even though valid
+	 * sense data is available.
+	 * So check for both.
+	 */
+	if ((tf.command & ATA_SENSE) ||
+		tf.lbah != 0 || tf.lbam != 0 || tf.lbal != 0) {
+		ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal);
+		qc->flags |= ATA_QCFLAG_SENSE_VALID;
+		ata_dev_warn(dev, "sense data %02x/%02x/%02x\n",
+			     tf.lbah, tf.lbam, tf.lbal);
+	} else {
+		ata_dev_warn(dev, "request sense failed stat %02x emask %x\n",
+			     tf.command, err_mask);
+	}
+	return err_mask;
+}
+
+/**
  *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  *	@dev: device to perform REQUEST_SENSE to
  *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
@@ -1772,6 +1844,19 @@
 	memcpy(&qc->result_tf, &tf, sizeof(tf));
 	qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
 	qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
+	if (qc->result_tf.auxiliary) {
+		char sense_key, asc, ascq;
+
+		sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
+		asc = (qc->result_tf.auxiliary >> 8) & 0xff;
+		ascq = qc->result_tf.auxiliary & 0xff;
+		ata_dev_dbg(dev, "NCQ Autosense %02x/%02x/%02x\n",
+			    sense_key, asc, ascq);
+		ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq);
+		ata_scsi_set_sense_information(qc->scsicmd, &qc->result_tf);
+		qc->flags |= ATA_QCFLAG_SENSE_VALID;
+	}
+
 	ehc->i.err_mask &= ~AC_ERR_DEV;
 }
 
@@ -1801,6 +1886,27 @@
 		return ATA_EH_RESET;
 	}
 
+	/*
+	 * Sense data reporting does not work if the
+	 * device fault bit is set.
+	 */
+	if ((stat & ATA_SENSE) && !(stat & ATA_DF) &&
+	    !(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
+		if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
+			tmp = ata_eh_request_sense(qc, qc->scsicmd);
+			if (tmp)
+				qc->err_mask |= tmp;
+			else
+				ata_scsi_set_sense_information(qc->scsicmd, tf);
+		} else {
+			ata_dev_warn(qc->dev, "sense data available but port frozen\n");
+		}
+	}
+
+	/* Set by NCQ autosense or request sense above */
+	if (qc->flags & ATA_QCFLAG_SENSE_VALID)
+		return 0;
+
 	if (stat & (ATA_ERR | ATA_DF))
 		qc->err_mask |= AC_ERR_DEV;
 	else
@@ -2186,6 +2292,7 @@
 		all_err_mask |= qc->err_mask;
 		if (qc->flags & ATA_QCFLAG_IO)
 			eflags |= ATA_EFLAG_IS_IO;
+		trace_ata_eh_link_autopsy_qc(qc);
 	}
 
 	/* enforce default EH actions */
@@ -2220,7 +2327,7 @@
 			eflags |= ATA_EFLAG_DUBIOUS_XFER;
 		ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
 	}
-
+	trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
 	DPRINTK("EXIT\n");
 }
 
@@ -2289,27 +2396,27 @@
 		const char *text;
 	} cmd_descr[] = {
 		{ ATA_CMD_DEV_RESET,		"DEVICE RESET" },
-		{ ATA_CMD_CHK_POWER, 		"CHECK POWER MODE" },
-		{ ATA_CMD_STANDBY, 		"STANDBY" },
-		{ ATA_CMD_IDLE, 		"IDLE" },
-		{ ATA_CMD_EDD, 			"EXECUTE DEVICE DIAGNOSTIC" },
-		{ ATA_CMD_DOWNLOAD_MICRO,   	"DOWNLOAD MICROCODE" },
+		{ ATA_CMD_CHK_POWER,		"CHECK POWER MODE" },
+		{ ATA_CMD_STANDBY,		"STANDBY" },
+		{ ATA_CMD_IDLE,			"IDLE" },
+		{ ATA_CMD_EDD,			"EXECUTE DEVICE DIAGNOSTIC" },
+		{ ATA_CMD_DOWNLOAD_MICRO,	"DOWNLOAD MICROCODE" },
 		{ ATA_CMD_DOWNLOAD_MICRO_DMA,	"DOWNLOAD MICROCODE DMA" },
 		{ ATA_CMD_NOP,			"NOP" },
-		{ ATA_CMD_FLUSH, 		"FLUSH CACHE" },
-		{ ATA_CMD_FLUSH_EXT, 		"FLUSH CACHE EXT" },
-		{ ATA_CMD_ID_ATA,  		"IDENTIFY DEVICE" },
-		{ ATA_CMD_ID_ATAPI, 		"IDENTIFY PACKET DEVICE" },
-		{ ATA_CMD_SERVICE, 		"SERVICE" },
-		{ ATA_CMD_READ, 		"READ DMA" },
-		{ ATA_CMD_READ_EXT, 		"READ DMA EXT" },
-		{ ATA_CMD_READ_QUEUED, 		"READ DMA QUEUED" },
-		{ ATA_CMD_READ_STREAM_EXT, 	"READ STREAM EXT" },
+		{ ATA_CMD_FLUSH,		"FLUSH CACHE" },
+		{ ATA_CMD_FLUSH_EXT,		"FLUSH CACHE EXT" },
+		{ ATA_CMD_ID_ATA,		"IDENTIFY DEVICE" },
+		{ ATA_CMD_ID_ATAPI,		"IDENTIFY PACKET DEVICE" },
+		{ ATA_CMD_SERVICE,		"SERVICE" },
+		{ ATA_CMD_READ,			"READ DMA" },
+		{ ATA_CMD_READ_EXT,		"READ DMA EXT" },
+		{ ATA_CMD_READ_QUEUED,		"READ DMA QUEUED" },
+		{ ATA_CMD_READ_STREAM_EXT,	"READ STREAM EXT" },
 		{ ATA_CMD_READ_STREAM_DMA_EXT,  "READ STREAM DMA EXT" },
-		{ ATA_CMD_WRITE, 		"WRITE DMA" },
-		{ ATA_CMD_WRITE_EXT, 		"WRITE DMA EXT" },
-		{ ATA_CMD_WRITE_QUEUED, 	"WRITE DMA QUEUED EXT" },
-		{ ATA_CMD_WRITE_STREAM_EXT, 	"WRITE STREAM EXT" },
+		{ ATA_CMD_WRITE,		"WRITE DMA" },
+		{ ATA_CMD_WRITE_EXT,		"WRITE DMA EXT" },
+		{ ATA_CMD_WRITE_QUEUED,		"WRITE DMA QUEUED EXT" },
+		{ ATA_CMD_WRITE_STREAM_EXT,	"WRITE STREAM EXT" },
 		{ ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
 		{ ATA_CMD_WRITE_FUA_EXT,	"WRITE DMA FUA EXT" },
 		{ ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
@@ -2325,7 +2432,7 @@
 		{ ATA_CMD_READ_MULTI_EXT,	"READ MULTIPLE EXT" },
 		{ ATA_CMD_WRITE_MULTI,		"WRITE MULTIPLE" },
 		{ ATA_CMD_WRITE_MULTI_EXT,	"WRITE MULTIPLE EXT" },
-		{ ATA_CMD_WRITE_MULTI_FUA_EXT, 	"WRITE MULTIPLE FUA EXT" },
+		{ ATA_CMD_WRITE_MULTI_FUA_EXT,	"WRITE MULTIPLE FUA EXT" },
 		{ ATA_CMD_SET_FEATURES,		"SET FEATURES" },
 		{ ATA_CMD_SET_MULTI,		"SET MULTIPLE MODE" },
 		{ ATA_CMD_VERIFY,		"READ VERIFY SECTOR(S)" },
@@ -2342,12 +2449,12 @@
 		{ ATA_CMD_READ_LOG_EXT,		"READ LOG EXT" },
 		{ ATA_CMD_WRITE_LOG_EXT,	"WRITE LOG EXT" },
 		{ ATA_CMD_READ_LOG_DMA_EXT,	"READ LOG DMA EXT" },
-		{ ATA_CMD_WRITE_LOG_DMA_EXT, 	"WRITE LOG DMA EXT" },
+		{ ATA_CMD_WRITE_LOG_DMA_EXT,	"WRITE LOG DMA EXT" },
 		{ ATA_CMD_TRUSTED_NONDATA,	"TRUSTED NON-DATA" },
 		{ ATA_CMD_TRUSTED_RCV,		"TRUSTED RECEIVE" },
-		{ ATA_CMD_TRUSTED_RCV_DMA, 	"TRUSTED RECEIVE DMA" },
+		{ ATA_CMD_TRUSTED_RCV_DMA,	"TRUSTED RECEIVE DMA" },
 		{ ATA_CMD_TRUSTED_SND,		"TRUSTED SEND" },
-		{ ATA_CMD_TRUSTED_SND_DMA, 	"TRUSTED SEND DMA" },
+		{ ATA_CMD_TRUSTED_SND_DMA,	"TRUSTED SEND DMA" },
 		{ ATA_CMD_PMP_READ,		"READ BUFFER" },
 		{ ATA_CMD_PMP_READ_DMA,		"READ BUFFER DMA" },
 		{ ATA_CMD_PMP_WRITE,		"WRITE BUFFER" },
@@ -2364,12 +2471,12 @@
 		{ ATA_CMD_MEDIA_LOCK,		"DOOR LOCK" },
 		{ ATA_CMD_MEDIA_UNLOCK,		"DOOR UNLOCK" },
 		{ ATA_CMD_DSM,			"DATA SET MANAGEMENT" },
-		{ ATA_CMD_CHK_MED_CRD_TYP, 	"CHECK MEDIA CARD TYPE" },
-		{ ATA_CMD_CFA_REQ_EXT_ERR, 	"CFA REQUEST EXTENDED ERROR" },
+		{ ATA_CMD_CHK_MED_CRD_TYP,	"CHECK MEDIA CARD TYPE" },
+		{ ATA_CMD_CFA_REQ_EXT_ERR,	"CFA REQUEST EXTENDED ERROR" },
 		{ ATA_CMD_CFA_WRITE_NE,		"CFA WRITE SECTORS WITHOUT ERASE" },
 		{ ATA_CMD_CFA_TRANS_SECT,	"CFA TRANSLATE SECTOR" },
 		{ ATA_CMD_CFA_ERASE,		"CFA ERASE SECTORS" },
-		{ ATA_CMD_CFA_WRITE_MULT_NE, 	"CFA WRITE MULTIPLE WITHOUT ERASE" },
+		{ ATA_CMD_CFA_WRITE_MULT_NE,	"CFA WRITE MULTIPLE WITHOUT ERASE" },
 		{ ATA_CMD_REQ_SENSE_DATA,	"REQUEST SENSE DATA EXT" },
 		{ ATA_CMD_SANITIZE_DEVICE,	"SANITIZE DEVICE" },
 		{ ATA_CMD_READ_LONG,		"READ LONG (with retries)" },
@@ -2543,14 +2650,15 @@
 
 #ifdef CONFIG_ATA_VERBOSE_ERROR
 		if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
-				    ATA_ERR)) {
+				    ATA_SENSE | ATA_ERR)) {
 			if (res->command & ATA_BUSY)
 				ata_dev_err(qc->dev, "status: { Busy }\n");
 			else
-				ata_dev_err(qc->dev, "status: { %s%s%s%s}\n",
+				ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n",
 				  res->command & ATA_DRDY ? "DRDY " : "",
 				  res->command & ATA_DF ? "DF " : "",
 				  res->command & ATA_DRQ ? "DRQ " : "",
+				  res->command & ATA_SENSE ? "SENSE " : "",
 				  res->command & ATA_ERR ? "ERR " : "");
 		}
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b061ba2..3131adc 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -270,13 +270,28 @@
 	    ata_scsi_park_show, ata_scsi_park_store);
 EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
 
-static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
+void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
 {
+	if (!cmd)
+		return;
+
 	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
 
 	scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq);
 }
 
+void ata_scsi_set_sense_information(struct scsi_cmnd *cmd,
+				    const struct ata_taskfile *tf)
+{
+	u64 information;
+
+	if (!cmd)
+		return;
+
+	information = ata_tf_read_block(tf, NULL);
+	scsi_set_sense_information(cmd->sense_buffer, information);
+}
+
 static ssize_t
 ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t count)
@@ -799,26 +814,27 @@
 	if (stat & ATA_BUSY) {
 		printk("Busy }\n");	/* Data is not valid in this case */
 	} else {
-		if (stat & 0x40)	printk("DriveReady ");
-		if (stat & 0x20)	printk("DeviceFault ");
-		if (stat & 0x10)	printk("SeekComplete ");
-		if (stat & 0x08)	printk("DataRequest ");
-		if (stat & 0x04)	printk("CorrectedError ");
-		if (stat & 0x02)	printk("Index ");
-		if (stat & 0x01)	printk("Error ");
+		if (stat & ATA_DRDY)	printk("DriveReady ");
+		if (stat & ATA_DF)	printk("DeviceFault ");
+		if (stat & ATA_DSC)	printk("SeekComplete ");
+		if (stat & ATA_DRQ)	printk("DataRequest ");
+		if (stat & ATA_CORR)	printk("CorrectedError ");
+		if (stat & ATA_SENSE)	printk("Sense ");
+		if (stat & ATA_ERR)	printk("Error ");
 		printk("}\n");
 
 		if (err) {
 			printk(KERN_WARNING "ata%u: error=0x%02x { ", id, err);
-			if (err & 0x04)		printk("DriveStatusError ");
-			if (err & 0x80) {
-				if (err & 0x04)	printk("BadCRC ");
+			if (err & ATA_ABORTED)	printk("DriveStatusError ");
+			if (err & ATA_ICRC) {
+				if (err & ATA_ABORTED)
+						printk("BadCRC ");
 				else		printk("Sector ");
 			}
-			if (err & 0x40)		printk("UncorrectableError ");
-			if (err & 0x10)		printk("SectorIdNotFound ");
-			if (err & 0x02)		printk("TrackZeroNotFound ");
-			if (err & 0x01)		printk("AddrMarkNotFound ");
+			if (err & ATA_UNC)	printk("UncorrectableError ");
+			if (err & ATA_IDNF)	printk("SectorIdNotFound ");
+			if (err & ATA_TRK0NF)	printk("TrackZeroNotFound ");
+			if (err & ATA_AMNF)	printk("AddrMarkNotFound ");
 			printk("}\n");
 		}
 	}
@@ -849,40 +865,59 @@
 	/* Based on the 3ware driver translation table */
 	static const unsigned char sense_table[][4] = {
 		/* BBD|ECC|ID|MAR */
-		{0xd1, 		ABORTED_COMMAND, 0x00, 0x00}, 	// Device busy                  Aborted command
+		{0xd1,		ABORTED_COMMAND, 0x00, 0x00},
+			// Device busy                  Aborted command
 		/* BBD|ECC|ID */
-		{0xd0,  	ABORTED_COMMAND, 0x00, 0x00}, 	// Device busy                  Aborted command
+		{0xd0,		ABORTED_COMMAND, 0x00, 0x00},
+			// Device busy                  Aborted command
 		/* ECC|MC|MARK */
-		{0x61, 		HARDWARE_ERROR, 0x00, 0x00}, 	// Device fault                 Hardware error
+		{0x61,		HARDWARE_ERROR, 0x00, 0x00},
+			// Device fault                 Hardware error
 		/* ICRC|ABRT */		/* NB: ICRC & !ABRT is BBD */
-		{0x84, 		ABORTED_COMMAND, 0x47, 0x00}, 	// Data CRC error               SCSI parity error
+		{0x84,		ABORTED_COMMAND, 0x47, 0x00},
+			// Data CRC error               SCSI parity error
 		/* MC|ID|ABRT|TRK0|MARK */
-		{0x37, 		NOT_READY, 0x04, 0x00}, 	// Unit offline                 Not ready
+		{0x37,		NOT_READY, 0x04, 0x00},
+			// Unit offline                 Not ready
 		/* MCR|MARK */
-		{0x09, 		NOT_READY, 0x04, 0x00}, 	// Unrecovered disk error       Not ready
+		{0x09,		NOT_READY, 0x04, 0x00},
+			// Unrecovered disk error       Not ready
 		/*  Bad address mark */
-		{0x01, 		MEDIUM_ERROR, 0x13, 0x00}, 	// Address mark not found       Address mark not found for data field
-		/* TRK0 */
-		{0x02, 		HARDWARE_ERROR, 0x00, 0x00}, 	// Track 0 not found		Hardware error
+		{0x01,		MEDIUM_ERROR, 0x13, 0x00},
+			// Address mark not found for data field
+		/* TRK0 - Track 0 not found */
+		{0x02,		HARDWARE_ERROR, 0x00, 0x00},
+			// Hardware error
 		/* Abort: 0x04 is not translated here, see below */
 		/* Media change request */
-		{0x08, 		NOT_READY, 0x04, 0x00}, 	// Media change request	  FIXME: faking offline
-		/* SRV/IDNF */
-		{0x10, 		ILLEGAL_REQUEST, 0x21, 0x00}, 	// ID not found                 Logical address out of range
-		/* MC */
-		{0x20, 		UNIT_ATTENTION, 0x28, 0x00}, 	// Media Changed		Not ready to ready change, medium may have changed
-		/* ECC */
-		{0x40, 		MEDIUM_ERROR, 0x11, 0x04}, 	// Uncorrectable ECC error      Unrecovered read error
+		{0x08,		NOT_READY, 0x04, 0x00},
+			// FIXME: faking offline
+		/* SRV/IDNF - ID not found */
+		{0x10,		ILLEGAL_REQUEST, 0x21, 0x00},
+			// Logical address out of range
+		/* MC - Media Changed */
+		{0x20,		UNIT_ATTENTION, 0x28, 0x00},
+			// Not ready to ready change, medium may have changed
+		/* ECC - Uncorrectable ECC error */
+		{0x40,		MEDIUM_ERROR, 0x11, 0x04},
+			// Unrecovered read error
 		/* BBD - block marked bad */
-		{0x80, 		MEDIUM_ERROR, 0x11, 0x04}, 	// Block marked bad		Medium error, unrecovered read error
+		{0x80,		MEDIUM_ERROR, 0x11, 0x04},
+			// Block marked bad	Medium error, unrecovered read error
 		{0xFF, 0xFF, 0xFF, 0xFF}, // END mark
 	};
 	static const unsigned char stat_table[][4] = {
 		/* Must be first because BUSY means no other bits valid */
-		{0x80, 		ABORTED_COMMAND, 0x47, 0x00},	// Busy, fake parity for now
-		{0x20, 		HARDWARE_ERROR,  0x44, 0x00}, 	// Device fault, internal target failure
-		{0x08, 		ABORTED_COMMAND, 0x47, 0x00},	// Timed out in xfer, fake parity for now
-		{0x04, 		RECOVERED_ERROR, 0x11, 0x00},	// Recovered ECC error	  Medium error, recovered
+		{0x80,		ABORTED_COMMAND, 0x47, 0x00},
+		// Busy, fake parity for now
+		{0x40,		ILLEGAL_REQUEST, 0x21, 0x04},
+		// Device ready, unaligned write command
+		{0x20,		HARDWARE_ERROR,  0x44, 0x00},
+		// Device fault, internal target failure
+		{0x08,		ABORTED_COMMAND, 0x47, 0x00},
+		// Timed out in xfer, fake parity for now
+		{0x04,		RECOVERED_ERROR, 0x11, 0x00},
+		// Recovered ECC error	  Medium error, recovered
 		{0xFF, 0xFF, 0xFF, 0xFF}, // END mark
 	};
 
@@ -1757,7 +1792,9 @@
 	    ((cdb[2] & 0x20) || need_sense)) {
 		ata_gen_passthru_sense(qc);
 	} else {
-		if (!need_sense) {
+		if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
+			cmd->result = SAM_STAT_CHECK_CONDITION;
+		} else if (!need_sense) {
 			cmd->result = SAM_STAT_GOOD;
 		} else {
 			/* TODO: decide which descriptor format to use
@@ -4240,10 +4277,7 @@
 	unsigned int i, tag;
 
 	for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
-		if (ap->flags & ATA_FLAG_LOWTAG)
-			tag = 1;
-		else
-			tag = tag < max_queue ? tag : 0;
+		tag = tag < max_queue ? tag : 0;
 
 		/* the last tag is reserved for internal command. */
 		if (tag == ATA_TAG_INTERNAL)
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2e86e3b..cdf6215 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -3220,11 +3220,11 @@
 	 * ->sff_irq_clear method.  Try to initialize bmdma_addr
 	 * regardless of dma masks.
 	 */
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		ata_bmdma_nodma(host, "failed to set dma mask");
 	if (!rc) {
-		rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+		rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 		if (rc)
 			ata_bmdma_nodma(host,
 					"failed to set consistent dma mask");
diff --git a/drivers/ata/libata-trace.c b/drivers/ata/libata-trace.c
new file mode 100644
index 0000000..fd30b8c
--- /dev/null
+++ b/drivers/ata/libata-trace.c
@@ -0,0 +1,151 @@
+/*
+ * libata-trace.c - trace functions for libata
+ *
+ * Copyright 2015 Hannes Reinecke
+ * Copyright 2015 SUSE Linux GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/trace_seq.h>
+#include <trace/events/libata.h>
+
+const char *
+libata_trace_parse_status(struct trace_seq *p, unsigned char status)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	trace_seq_printf(p, "{ ");
+	if (status & ATA_BUSY)
+		trace_seq_printf(p, "BUSY ");
+	if (status & ATA_DRDY)
+		trace_seq_printf(p, "DRDY ");
+	if (status & ATA_DF)
+		trace_seq_printf(p, "DF ");
+	if (status & ATA_DSC)
+		trace_seq_printf(p, "DSC ");
+	if (status & ATA_DRQ)
+		trace_seq_printf(p, "DRQ ");
+	if (status & ATA_CORR)
+		trace_seq_printf(p, "CORR ");
+	if (status & ATA_SENSE)
+		trace_seq_printf(p, "SENSE ");
+	if (status & ATA_ERR)
+		trace_seq_printf(p, "ERR ");
+	trace_seq_putc(p, '}');
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
+const char *
+libata_trace_parse_eh_action(struct trace_seq *p, unsigned int eh_action)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	trace_seq_printf(p, "%x", eh_action);
+	if (eh_action) {
+		trace_seq_printf(p, "{ ");
+		if (eh_action & ATA_EH_REVALIDATE)
+			trace_seq_printf(p, "REVALIDATE ");
+		if (eh_action & (ATA_EH_SOFTRESET | ATA_EH_HARDRESET))
+			trace_seq_printf(p, "RESET ");
+		else if (eh_action & ATA_EH_SOFTRESET)
+			trace_seq_printf(p, "SOFTRESET ");
+		else if (eh_action & ATA_EH_HARDRESET)
+			trace_seq_printf(p, "HARDRESET ");
+		if (eh_action & ATA_EH_ENABLE_LINK)
+			trace_seq_printf(p, "ENABLE_LINK ");
+		if (eh_action & ATA_EH_PARK)
+			trace_seq_printf(p, "PARK ");
+		trace_seq_putc(p, '}');
+	}
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
+const char *
+libata_trace_parse_eh_err_mask(struct trace_seq *p, unsigned int eh_err_mask)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	trace_seq_printf(p, "%x", eh_err_mask);
+	if (eh_err_mask) {
+		trace_seq_printf(p, "{ ");
+		if (eh_err_mask & AC_ERR_DEV)
+			trace_seq_printf(p, "DEV ");
+		if (eh_err_mask & AC_ERR_HSM)
+			trace_seq_printf(p, "HSM ");
+		if (eh_err_mask & AC_ERR_TIMEOUT)
+			trace_seq_printf(p, "TIMEOUT ");
+		if (eh_err_mask & AC_ERR_MEDIA)
+			trace_seq_printf(p, "MEDIA ");
+		if (eh_err_mask & AC_ERR_ATA_BUS)
+			trace_seq_printf(p, "ATA_BUS ");
+		if (eh_err_mask & AC_ERR_HOST_BUS)
+			trace_seq_printf(p, "HOST_BUS ");
+		if (eh_err_mask & AC_ERR_SYSTEM)
+			trace_seq_printf(p, "SYSTEM ");
+		if (eh_err_mask & AC_ERR_INVALID)
+			trace_seq_printf(p, "INVALID ");
+		if (eh_err_mask & AC_ERR_OTHER)
+			trace_seq_printf(p, "OTHER ");
+		if (eh_err_mask & AC_ERR_NODEV_HINT)
+			trace_seq_printf(p, "NODEV_HINT ");
+		if (eh_err_mask & AC_ERR_NCQ)
+			trace_seq_printf(p, "NCQ ");
+		trace_seq_putc(p, '}');
+	}
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
+const char *
+libata_trace_parse_qc_flags(struct trace_seq *p, unsigned int qc_flags)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	trace_seq_printf(p, "%x", qc_flags);
+	if (qc_flags) {
+		trace_seq_printf(p, "{ ");
+		if (qc_flags & ATA_QCFLAG_ACTIVE)
+			trace_seq_printf(p, "ACTIVE ");
+		if (qc_flags & ATA_QCFLAG_DMAMAP)
+			trace_seq_printf(p, "DMAMAP ");
+		if (qc_flags & ATA_QCFLAG_IO)
+			trace_seq_printf(p, "IO ");
+		if (qc_flags & ATA_QCFLAG_RESULT_TF)
+			trace_seq_printf(p, "RESULT_TF ");
+		if (qc_flags & ATA_QCFLAG_CLEAR_EXCL)
+			trace_seq_printf(p, "CLEAR_EXCL ");
+		if (qc_flags & ATA_QCFLAG_QUIET)
+			trace_seq_printf(p, "QUIET ");
+		if (qc_flags & ATA_QCFLAG_RETRY)
+			trace_seq_printf(p, "RETRY ");
+		if (qc_flags & ATA_QCFLAG_FAILED)
+			trace_seq_printf(p, "FAILED ");
+		if (qc_flags & ATA_QCFLAG_SENSE_VALID)
+			trace_seq_printf(p, "SENSE_VALID ");
+		if (qc_flags & ATA_QCFLAG_EH_SCHEDULED)
+			trace_seq_printf(p, "EH_SCHEDULED ");
+		trace_seq_putc(p, '}');
+	}
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index f840ca1..a998a17 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -67,7 +67,8 @@
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
-extern u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev);
+extern u64 ata_tf_read_block(const struct ata_taskfile *tf,
+			     struct ata_device *dev);
 extern unsigned ata_exec_internal(struct ata_device *dev,
 				  struct ata_taskfile *tf, const u8 *cdb,
 				  int dma_dir, void *buf, unsigned int buflen,
@@ -137,6 +138,9 @@
 			      struct scsi_host_template *sht);
 extern void ata_scsi_scan_host(struct ata_port *ap, int sync);
 extern int ata_scsi_offline_dev(struct ata_device *dev);
+extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq);
+extern void ata_scsi_set_sense_information(struct scsi_cmnd *cmd,
+					   const struct ata_taskfile *tf);
 extern void ata_scsi_media_change_notify(struct ata_device *dev);
 extern void ata_scsi_hotplug(struct work_struct *work);
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c
index a705cfc..3ea50dc 100644
--- a/drivers/ata/pata_atp867x.c
+++ b/drivers/ata/pata_atp867x.c
@@ -475,11 +475,11 @@
 
 	atp867x_fixup(host);
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	return rc;
 }
 
diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c
index d65cb9d..4cb2407 100644
--- a/drivers/ata/pata_cs5520.c
+++ b/drivers/ata/pata_cs5520.c
@@ -164,11 +164,11 @@
 		return -ENODEV;
 	}
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR DRV_NAME ": unable to configure DMA mask.\n");
 		return -ENODEV;
 	}
-	if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR DRV_NAME ": unable to configure consistent DMA mask.\n");
 		return -ENODEV;
 	}
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index d019cdd..b2fc023 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -221,10 +221,10 @@
 	if (rc)
 		return rc;
 	host->iomap = pcim_iomap_table(pdev);
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c
index efb272d..633aa29 100644
--- a/drivers/ata/pata_ninja32.c
+++ b/drivers/ata/pata_ninja32.c
@@ -122,10 +122,10 @@
 		return rc;
 
 	host->iomap = pcim_iomap_table(dev);
-	rc = pci_set_dma_mask(dev, ATA_DMA_MASK);
+	rc = dma_set_mask(&dev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(dev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&dev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 	pci_set_master(dev);
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index dca8251..d9ef9e27 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -730,11 +730,11 @@
 		return rc;
 	host->iomap = pcim_iomap_table(pdev);
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 7f4cb76..5cd60d6 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -1029,10 +1029,10 @@
 	if (rc)
 		return rc;
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index f597edc..c14071b 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -374,10 +374,10 @@
 	host->iomap = pcim_iomap_table(pdev);
 
 	/* Setup DMA masks */
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 	pci_set_master(pdev);
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index f10631b..64d682c 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -593,12 +593,12 @@
 {
 	int rc;
 
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc) {
 		dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 		return rc;
 	}
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc) {
 		dev_err(&pdev->dev, "32-bit consistent DMA enable failed\n");
 		return rc;
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index fdb0f28..9020349 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -36,11 +36,16 @@
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include <linux/slab.h>
+
 #include "libata.h"
 
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_cmnd.h>
 
+/* Supported DMA engine drivers */
+#include <linux/platform_data/dma-dw.h>
+#include <linux/dma/dw.h>
+
 /* These two are defined in "libata.h" */
 #undef	DRV_NAME
 #undef	DRV_VERSION
@@ -60,153 +65,9 @@
 #define NO_IRQ		0
 #endif
 
-/* SATA DMA driver Globals */
-#define DMA_NUM_CHANS		1
-#define DMA_NUM_CHAN_REGS	8
-
-/* SATA DMA Register definitions */
 #define AHB_DMA_BRST_DFLT	64	/* 16 data items burst length*/
 
-struct dmareg {
-	u32 low;		/* Low bits 0-31 */
-	u32 high;		/* High bits 32-63 */
-};
-
-/* DMA Per Channel registers */
-struct dma_chan_regs {
-	struct dmareg sar;	/* Source Address */
-	struct dmareg dar;	/* Destination address */
-	struct dmareg llp;	/* Linked List Pointer */
-	struct dmareg ctl;	/* Control */
-	struct dmareg sstat;	/* Source Status not implemented in core */
-	struct dmareg dstat;	/* Destination Status not implemented in core*/
-	struct dmareg sstatar;	/* Source Status Address not impl in core */
-	struct dmareg dstatar;	/* Destination Status Address not implemente */
-	struct dmareg cfg;	/* Config */
-	struct dmareg sgr;	/* Source Gather */
-	struct dmareg dsr;	/* Destination Scatter */
-};
-
-/* Generic Interrupt Registers */
-struct dma_interrupt_regs {
-	struct dmareg tfr;	/* Transfer Interrupt */
-	struct dmareg block;	/* Block Interrupt */
-	struct dmareg srctran;	/* Source Transfer Interrupt */
-	struct dmareg dsttran;	/* Dest Transfer Interrupt */
-	struct dmareg error;	/* Error */
-};
-
-struct ahb_dma_regs {
-	struct dma_chan_regs	chan_regs[DMA_NUM_CHAN_REGS];
-	struct dma_interrupt_regs interrupt_raw;	/* Raw Interrupt */
-	struct dma_interrupt_regs interrupt_status;	/* Interrupt Status */
-	struct dma_interrupt_regs interrupt_mask;	/* Interrupt Mask */
-	struct dma_interrupt_regs interrupt_clear;	/* Interrupt Clear */
-	struct dmareg		statusInt;	/* Interrupt combined*/
-	struct dmareg		rq_srcreg;	/* Src Trans Req */
-	struct dmareg		rq_dstreg;	/* Dst Trans Req */
-	struct dmareg		rq_sgl_srcreg;	/* Sngl Src Trans Req*/
-	struct dmareg		rq_sgl_dstreg;	/* Sngl Dst Trans Req*/
-	struct dmareg		rq_lst_srcreg;	/* Last Src Trans Req*/
-	struct dmareg		rq_lst_dstreg;	/* Last Dst Trans Req*/
-	struct dmareg		dma_cfg;		/* DMA Config */
-	struct dmareg		dma_chan_en;		/* DMA Channel Enable*/
-	struct dmareg		dma_id;			/* DMA ID */
-	struct dmareg		dma_test;		/* DMA Test */
-	struct dmareg		res1;			/* reserved */
-	struct dmareg		res2;			/* reserved */
-	/*
-	 * DMA Comp Params
-	 * Param 6 = dma_param[0], Param 5 = dma_param[1],
-	 * Param 4 = dma_param[2] ...
-	 */
-	struct dmareg		dma_params[6];
-};
-
-/* Data structure for linked list item */
-struct lli {
-	u32		sar;		/* Source Address */
-	u32		dar;		/* Destination address */
-	u32		llp;		/* Linked List Pointer */
-	struct dmareg	ctl;		/* Control */
-	struct dmareg	dstat;		/* Destination Status */
-};
-
 enum {
-	SATA_DWC_DMAC_LLI_SZ =	(sizeof(struct lli)),
-	SATA_DWC_DMAC_LLI_NUM =	256,
-	SATA_DWC_DMAC_LLI_TBL_SZ = (SATA_DWC_DMAC_LLI_SZ * \
-					SATA_DWC_DMAC_LLI_NUM),
-	SATA_DWC_DMAC_TWIDTH_BYTES = 4,
-	SATA_DWC_DMAC_CTRL_TSIZE_MAX = (0x00000800 * \
-						SATA_DWC_DMAC_TWIDTH_BYTES),
-};
-
-/* DMA Register Operation Bits */
-enum {
-	DMA_EN	=		0x00000001, /* Enable AHB DMA */
-	DMA_CTL_LLP_SRCEN =	0x10000000, /* Blk chain enable Src */
-	DMA_CTL_LLP_DSTEN =	0x08000000, /* Blk chain enable Dst */
-};
-
-#define	DMA_CTL_BLK_TS(size)	((size) & 0x000000FFF)	/* Blk Transfer size */
-#define DMA_CHANNEL(ch)		(0x00000001 << (ch))	/* Select channel */
-	/* Enable channel */
-#define	DMA_ENABLE_CHAN(ch)	((0x00000001 << (ch)) |			\
-				 ((0x000000001 << (ch)) << 8))
-	/* Disable channel */
-#define	DMA_DISABLE_CHAN(ch)	(0x00000000 | ((0x000000001 << (ch)) << 8))
-	/* Transfer Type & Flow Controller */
-#define	DMA_CTL_TTFC(type)	(((type) & 0x7) << 20)
-#define	DMA_CTL_SMS(num)	(((num) & 0x3) << 25) /* Src Master Select */
-#define	DMA_CTL_DMS(num)	(((num) & 0x3) << 23)/* Dst Master Select */
-	/* Src Burst Transaction Length */
-#define DMA_CTL_SRC_MSIZE(size) (((size) & 0x7) << 14)
-	/* Dst Burst Transaction Length */
-#define	DMA_CTL_DST_MSIZE(size) (((size) & 0x7) << 11)
-	/* Source Transfer Width */
-#define	DMA_CTL_SRC_TRWID(size) (((size) & 0x7) << 4)
-	/* Destination Transfer Width */
-#define	DMA_CTL_DST_TRWID(size) (((size) & 0x7) << 1)
-
-/* Assign HW handshaking interface (x) to destination / source peripheral */
-#define	DMA_CFG_HW_HS_DEST(int_num) (((int_num) & 0xF) << 11)
-#define	DMA_CFG_HW_HS_SRC(int_num) (((int_num) & 0xF) << 7)
-#define	DMA_CFG_HW_CH_PRIOR(int_num) (((int_num) & 0xF) << 5)
-#define	DMA_LLP_LMS(addr, master) (((addr) & 0xfffffffc) | (master))
-
-/*
- * This define is used to set block chaining disabled in the control low
- * register.  It is already in little endian format so it can be &'d dirctly.
- * It is essentially: cpu_to_le32(~(DMA_CTL_LLP_SRCEN | DMA_CTL_LLP_DSTEN))
- */
-enum {
-	DMA_CTL_LLP_DISABLE_LE32 = 0xffffffe7,
-	DMA_CTL_TTFC_P2M_DMAC =	0x00000002, /* Per to mem, DMAC cntr */
-	DMA_CTL_TTFC_M2P_PER =	0x00000003, /* Mem to per, peripheral cntr */
-	DMA_CTL_SINC_INC =	0x00000000, /* Source Address Increment */
-	DMA_CTL_SINC_DEC =	0x00000200,
-	DMA_CTL_SINC_NOCHANGE =	0x00000400,
-	DMA_CTL_DINC_INC =	0x00000000, /* Destination Address Increment */
-	DMA_CTL_DINC_DEC =	0x00000080,
-	DMA_CTL_DINC_NOCHANGE =	0x00000100,
-	DMA_CTL_INT_EN =	0x00000001, /* Interrupt Enable */
-
-/* Channel Configuration Register high bits */
-	DMA_CFG_FCMOD_REQ =	0x00000001, /* Flow Control - request based */
-	DMA_CFG_PROTCTL	=	(0x00000003 << 2),/* Protection Control */
-
-/* Channel Configuration Register low bits */
-	DMA_CFG_RELD_DST =	0x80000000, /* Reload Dest / Src Addr */
-	DMA_CFG_RELD_SRC =	0x40000000,
-	DMA_CFG_HS_SELSRC =	0x00000800, /* Software handshake Src/ Dest */
-	DMA_CFG_HS_SELDST =	0x00000400,
-	DMA_CFG_FIFOEMPTY =     (0x00000001 << 9), /* FIFO Empty bit */
-
-/* Channel Linked List Pointer Register */
-	DMA_LLP_AHBMASTER1 =	0,	/* List Master Select */
-	DMA_LLP_AHBMASTER2 =	1,
-
 	SATA_DWC_MAX_PORTS = 1,
 
 	SATA_DWC_SCR_OFFSET = 0x24,
@@ -287,7 +148,7 @@
 	struct ata_host		*host;
 	u8 __iomem		*reg_base;
 	struct sata_dwc_regs	*sata_dwc_regs;	/* DW Synopsys SATA specific */
-	int			irq_dma;
+	struct dw_dma_chip	*dma;
 };
 
 #define SATA_DWC_QCMD_MAX	32
@@ -295,10 +156,13 @@
 struct sata_dwc_device_port {
 	struct sata_dwc_device	*hsdev;
 	int			cmd_issued[SATA_DWC_QCMD_MAX];
-	struct lli		*llit[SATA_DWC_QCMD_MAX];  /* DMA LLI table */
-	dma_addr_t		llit_dma[SATA_DWC_QCMD_MAX];
-	u32			dma_chan[SATA_DWC_QCMD_MAX];
 	int			dma_pending[SATA_DWC_QCMD_MAX];
+
+	/* DMA info */
+	struct dw_dma_slave		*dws;
+	struct dma_chan			*chan;
+	struct dma_async_tx_descriptor	*desc[SATA_DWC_QCMD_MAX];
+	u32				dma_interrupt_count;
 };
 
 /*
@@ -330,14 +194,17 @@
 	void	__iomem	 *scr_addr_sstatus;
 	u32	sata_dwc_sactive_issued ;
 	u32	sata_dwc_sactive_queued ;
-	u32	dma_interrupt_count;
-	struct	ahb_dma_regs	*sata_dma_regs;
-	struct	device	*dwc_dev;
-	int	dma_channel;
 };
 
 static struct sata_dwc_host_priv host_pvt;
 
+static struct dw_dma_slave sata_dwc_dma_dws = {
+	.src_id = 0,
+	.dst_id = 0,
+	.src_master = 0,
+	.dst_master = 1,
+};
+
 /*
  * Prototypes
  */
@@ -347,12 +214,6 @@
 static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status);
 static void sata_dwc_port_stop(struct ata_port *ap);
 static void sata_dwc_clear_dmacr(struct sata_dwc_device_port *hsdevp, u8 tag);
-static int dma_dwc_init(struct sata_dwc_device *hsdev, int irq);
-static void dma_dwc_exit(struct sata_dwc_device *hsdev);
-static int dma_dwc_xfer_setup(struct scatterlist *sg, int num_elems,
-			      struct lli *lli, dma_addr_t dma_lli,
-			      void __iomem *addr, int dir);
-static void dma_dwc_xfer_start(int dma_ch);
 
 static const char *get_prot_descript(u8 protocol)
 {
@@ -390,90 +251,23 @@
 	}
 }
 
-static void sata_dwc_tf_dump(struct ata_taskfile *tf)
+static void sata_dwc_tf_dump(struct ata_port *ap, struct ata_taskfile *tf)
 {
-	dev_vdbg(host_pvt.dwc_dev, "taskfile cmd: 0x%02x protocol: %s flags:"
-		"0x%lx device: %x\n", tf->command,
-		get_prot_descript(tf->protocol), tf->flags, tf->device);
-	dev_vdbg(host_pvt.dwc_dev, "feature: 0x%02x nsect: 0x%x lbal: 0x%x "
-		"lbam: 0x%x lbah: 0x%x\n", tf->feature, tf->nsect, tf->lbal,
-		 tf->lbam, tf->lbah);
-	dev_vdbg(host_pvt.dwc_dev, "hob_feature: 0x%02x hob_nsect: 0x%x "
-		"hob_lbal: 0x%x hob_lbam: 0x%x hob_lbah: 0x%x\n",
+	dev_vdbg(ap->dev,
+		"taskfile cmd: 0x%02x protocol: %s flags: 0x%lx device: %x\n",
+		tf->command, get_prot_descript(tf->protocol), tf->flags,
+		tf->device);
+	dev_vdbg(ap->dev,
+		"feature: 0x%02x nsect: 0x%x lbal: 0x%x lbam: 0x%x lbah: 0x%x\n",
+		tf->feature, tf->nsect, tf->lbal, tf->lbam, tf->lbah);
+	dev_vdbg(ap->dev,
+		"hob_feature: 0x%02x hob_nsect: 0x%x hob_lbal: 0x%x hob_lbam: 0x%x hob_lbah: 0x%x\n",
 		tf->hob_feature, tf->hob_nsect, tf->hob_lbal, tf->hob_lbam,
 		tf->hob_lbah);
 }
 
-/*
- * Function: get_burst_length_encode
- * arguments: datalength: length in bytes of data
- * returns value to be programmed in register corresponding to data length
- * This value is effectively the log(base 2) of the length
- */
-static  int get_burst_length_encode(int datalength)
+static void dma_dwc_xfer_done(void *hsdev_instance)
 {
-	int items = datalength >> 2;	/* div by 4 to get lword count */
-
-	if (items >= 64)
-		return 5;
-
-	if (items >= 32)
-		return 4;
-
-	if (items >= 16)
-		return 3;
-
-	if (items >= 8)
-		return 2;
-
-	if (items >= 4)
-		return 1;
-
-	return 0;
-}
-
-static  void clear_chan_interrupts(int c)
-{
-	out_le32(&(host_pvt.sata_dma_regs->interrupt_clear.tfr.low),
-		 DMA_CHANNEL(c));
-	out_le32(&(host_pvt.sata_dma_regs->interrupt_clear.block.low),
-		 DMA_CHANNEL(c));
-	out_le32(&(host_pvt.sata_dma_regs->interrupt_clear.srctran.low),
-		 DMA_CHANNEL(c));
-	out_le32(&(host_pvt.sata_dma_regs->interrupt_clear.dsttran.low),
-		 DMA_CHANNEL(c));
-	out_le32(&(host_pvt.sata_dma_regs->interrupt_clear.error.low),
-		 DMA_CHANNEL(c));
-}
-
-/*
- * Function: dma_request_channel
- * arguments: None
- * returns channel number if available else -1
- * This function assigns the next available DMA channel from the list to the
- * requester
- */
-static int dma_request_channel(void)
-{
-	/* Check if the channel is not currently in use */
-	if (!(in_le32(&(host_pvt.sata_dma_regs->dma_chan_en.low)) &
-		DMA_CHANNEL(host_pvt.dma_channel)))
-		return host_pvt.dma_channel;
-	dev_err(host_pvt.dwc_dev, "%s Channel %d is currently in use\n",
-		__func__, host_pvt.dma_channel);
-	return -1;
-}
-
-/*
- * Function: dma_dwc_interrupt
- * arguments: irq, dev_id, pt_regs
- * returns channel number if available else -1
- * Interrupt Handler for DW AHB SATA DMA
- */
-static irqreturn_t dma_dwc_interrupt(int irq, void *hsdev_instance)
-{
-	int chan;
-	u32 tfr_reg, err_reg;
 	unsigned long flags;
 	struct sata_dwc_device *hsdev = hsdev_instance;
 	struct ata_host *host = (struct ata_host *)hsdev->host;
@@ -487,341 +281,65 @@
 	hsdevp = HSDEVP_FROM_AP(ap);
 	tag = ap->link.active_tag;
 
-	tfr_reg = in_le32(&(host_pvt.sata_dma_regs->interrupt_status.tfr\
-			.low));
-	err_reg = in_le32(&(host_pvt.sata_dma_regs->interrupt_status.error\
-			.low));
-
-	dev_dbg(ap->dev, "eot=0x%08x err=0x%08x pending=%d active port=%d\n",
-		tfr_reg, err_reg, hsdevp->dma_pending[tag], port);
-
-	chan = host_pvt.dma_channel;
-	if (chan >= 0) {
-		/* Check for end-of-transfer interrupt. */
-		if (tfr_reg & DMA_CHANNEL(chan)) {
-			/*
-			 * Each DMA command produces 2 interrupts.  Only
-			 * complete the command after both interrupts have been
-			 * seen. (See sata_dwc_isr())
-			 */
-			host_pvt.dma_interrupt_count++;
-			sata_dwc_clear_dmacr(hsdevp, tag);
-
-			if (hsdevp->dma_pending[tag] ==
-			    SATA_DWC_DMA_PENDING_NONE) {
-				dev_err(ap->dev, "DMA not pending eot=0x%08x "
-					"err=0x%08x tag=0x%02x pending=%d\n",
-					tfr_reg, err_reg, tag,
-					hsdevp->dma_pending[tag]);
-			}
-
-			if ((host_pvt.dma_interrupt_count % 2) == 0)
-				sata_dwc_dma_xfer_complete(ap, 1);
-
-			/* Clear the interrupt */
-			out_le32(&(host_pvt.sata_dma_regs->interrupt_clear\
-				.tfr.low),
-				 DMA_CHANNEL(chan));
-		}
-
-		/* Check for error interrupt. */
-		if (err_reg & DMA_CHANNEL(chan)) {
-			/* TODO Need error handler ! */
-			dev_err(ap->dev, "error interrupt err_reg=0x%08x\n",
-				err_reg);
-
-			/* Clear the interrupt. */
-			out_le32(&(host_pvt.sata_dma_regs->interrupt_clear\
-				.error.low),
-				 DMA_CHANNEL(chan));
-		}
-	}
-	spin_unlock_irqrestore(&host->lock, flags);
-	return IRQ_HANDLED;
-}
-
-/*
- * Function: dma_request_interrupts
- * arguments: hsdev
- * returns status
- * This function registers ISR for a particular DMA channel interrupt
- */
-static int dma_request_interrupts(struct sata_dwc_device *hsdev, int irq)
-{
-	int retval = 0;
-	int chan = host_pvt.dma_channel;
-
-	if (chan >= 0) {
-		/* Unmask error interrupt */
-		out_le32(&(host_pvt.sata_dma_regs)->interrupt_mask.error.low,
-			 DMA_ENABLE_CHAN(chan));
-
-		/* Unmask end-of-transfer interrupt */
-		out_le32(&(host_pvt.sata_dma_regs)->interrupt_mask.tfr.low,
-			 DMA_ENABLE_CHAN(chan));
-	}
-
-	retval = request_irq(irq, dma_dwc_interrupt, 0, "SATA DMA", hsdev);
-	if (retval) {
-		dev_err(host_pvt.dwc_dev, "%s: could not get IRQ %d\n",
-		__func__, irq);
-		return -ENODEV;
-	}
-
-	/* Mark this interrupt as requested */
-	hsdev->irq_dma = irq;
-	return 0;
-}
-
-/*
- * Function: map_sg_to_lli
- * The Synopsis driver has a comment proposing that better performance
- * is possible by only enabling interrupts on the last item in the linked list.
- * However, it seems that could be a problem if an error happened on one of the
- * first items.  The transfer would halt, but no error interrupt would occur.
- * Currently this function sets interrupts enabled for each linked list item:
- * DMA_CTL_INT_EN.
- */
-static int map_sg_to_lli(struct scatterlist *sg, int num_elems,
-			struct lli *lli, dma_addr_t dma_lli,
-			void __iomem *dmadr_addr, int dir)
-{
-	int i, idx = 0;
-	int fis_len = 0;
-	dma_addr_t next_llp;
-	int bl;
-	int sms_val, dms_val;
-
-	sms_val = 0;
-	dms_val = 1 + host_pvt.dma_channel;
-	dev_dbg(host_pvt.dwc_dev,
-		"%s: sg=%p nelem=%d lli=%p dma_lli=0x%pad dmadr=0x%p\n",
-		__func__, sg, num_elems, lli, &dma_lli, dmadr_addr);
-
-	bl = get_burst_length_encode(AHB_DMA_BRST_DFLT);
-
-	for (i = 0; i < num_elems; i++, sg++) {
-		u32 addr, offset;
-		u32 sg_len, len;
-
-		addr = (u32) sg_dma_address(sg);
-		sg_len = sg_dma_len(sg);
-
-		dev_dbg(host_pvt.dwc_dev, "%s: elem=%d sg_addr=0x%x sg_len"
-			"=%d\n", __func__, i, addr, sg_len);
-
-		while (sg_len) {
-			if (idx >= SATA_DWC_DMAC_LLI_NUM) {
-				/* The LLI table is not large enough. */
-				dev_err(host_pvt.dwc_dev, "LLI table overrun "
-				"(idx=%d)\n", idx);
-				break;
-			}
-			len = (sg_len > SATA_DWC_DMAC_CTRL_TSIZE_MAX) ?
-				SATA_DWC_DMAC_CTRL_TSIZE_MAX : sg_len;
-
-			offset = addr & 0xffff;
-			if ((offset + sg_len) > 0x10000)
-				len = 0x10000 - offset;
-
-			/*
-			 * Make sure a LLI block is not created that will span
-			 * 8K max FIS boundary.  If the block spans such a FIS
-			 * boundary, there is a chance that a DMA burst will
-			 * cross that boundary -- this results in an error in
-			 * the host controller.
-			 */
-			if (fis_len + len > 8192) {
-				dev_dbg(host_pvt.dwc_dev, "SPLITTING: fis_len="
-					"%d(0x%x) len=%d(0x%x)\n", fis_len,
-					 fis_len, len, len);
-				len = 8192 - fis_len;
-				fis_len = 0;
-			} else {
-				fis_len += len;
-			}
-			if (fis_len == 8192)
-				fis_len = 0;
-
-			/*
-			 * Set DMA addresses and lower half of control register
-			 * based on direction.
-			 */
-			if (dir == DMA_FROM_DEVICE) {
-				lli[idx].dar = cpu_to_le32(addr);
-				lli[idx].sar = cpu_to_le32((u32)dmadr_addr);
-
-				lli[idx].ctl.low = cpu_to_le32(
-					DMA_CTL_TTFC(DMA_CTL_TTFC_P2M_DMAC) |
-					DMA_CTL_SMS(sms_val) |
-					DMA_CTL_DMS(dms_val) |
-					DMA_CTL_SRC_MSIZE(bl) |
-					DMA_CTL_DST_MSIZE(bl) |
-					DMA_CTL_SINC_NOCHANGE |
-					DMA_CTL_SRC_TRWID(2) |
-					DMA_CTL_DST_TRWID(2) |
-					DMA_CTL_INT_EN |
-					DMA_CTL_LLP_SRCEN |
-					DMA_CTL_LLP_DSTEN);
-			} else {	/* DMA_TO_DEVICE */
-				lli[idx].sar = cpu_to_le32(addr);
-				lli[idx].dar = cpu_to_le32((u32)dmadr_addr);
-
-				lli[idx].ctl.low = cpu_to_le32(
-					DMA_CTL_TTFC(DMA_CTL_TTFC_M2P_PER) |
-					DMA_CTL_SMS(dms_val) |
-					DMA_CTL_DMS(sms_val) |
-					DMA_CTL_SRC_MSIZE(bl) |
-					DMA_CTL_DST_MSIZE(bl) |
-					DMA_CTL_DINC_NOCHANGE |
-					DMA_CTL_SRC_TRWID(2) |
-					DMA_CTL_DST_TRWID(2) |
-					DMA_CTL_INT_EN |
-					DMA_CTL_LLP_SRCEN |
-					DMA_CTL_LLP_DSTEN);
-			}
-
-			dev_dbg(host_pvt.dwc_dev, "%s setting ctl.high len: "
-				"0x%08x val: 0x%08x\n", __func__,
-				len, DMA_CTL_BLK_TS(len / 4));
-
-			/* Program the LLI CTL high register */
-			lli[idx].ctl.high = cpu_to_le32(DMA_CTL_BLK_TS\
-						(len / 4));
-
-			/* Program the next pointer.  The next pointer must be
-			 * the physical address, not the virtual address.
-			 */
-			next_llp = (dma_lli + ((idx + 1) * sizeof(struct \
-							lli)));
-
-			/* The last 2 bits encode the list master select. */
-			next_llp = DMA_LLP_LMS(next_llp, DMA_LLP_AHBMASTER2);
-
-			lli[idx].llp = cpu_to_le32(next_llp);
-			idx++;
-			sg_len -= len;
-			addr += len;
-		}
-	}
-
 	/*
-	 * The last next ptr has to be zero and the last control low register
-	 * has to have LLP_SRC_EN and LLP_DST_EN (linked list pointer source
-	 * and destination enable) set back to 0 (disabled.) This is what tells
-	 * the core that this is the last item in the linked list.
+	 * Each DMA command produces 2 interrupts.  Only
+	 * complete the command after both interrupts have been
+	 * seen. (See sata_dwc_isr())
 	 */
-	if (idx) {
-		lli[idx-1].llp = 0x00000000;
-		lli[idx-1].ctl.low &= DMA_CTL_LLP_DISABLE_LE32;
+	hsdevp->dma_interrupt_count++;
+	sata_dwc_clear_dmacr(hsdevp, tag);
 
-		/* Flush cache to memory */
-		dma_cache_sync(NULL, lli, (sizeof(struct lli) * idx),
-			       DMA_BIDIRECTIONAL);
+	if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_NONE) {
+		dev_err(ap->dev, "DMA not pending tag=0x%02x pending=%d\n",
+			tag, hsdevp->dma_pending[tag]);
 	}
 
-	return idx;
+	if ((hsdevp->dma_interrupt_count % 2) == 0)
+		sata_dwc_dma_xfer_complete(ap, 1);
+
+	spin_unlock_irqrestore(&host->lock, flags);
 }
 
-/*
- * Function: dma_dwc_xfer_start
- * arguments: Channel number
- * Return : None
- * Enables the DMA channel
- */
-static void dma_dwc_xfer_start(int dma_ch)
+static struct dma_async_tx_descriptor *dma_dwc_xfer_setup(struct ata_queued_cmd *qc)
 {
-	/* Enable the DMA channel */
-	out_le32(&(host_pvt.sata_dma_regs->dma_chan_en.low),
-		 in_le32(&(host_pvt.sata_dma_regs->dma_chan_en.low)) |
-		 DMA_ENABLE_CHAN(dma_ch));
-}
+	struct ata_port *ap = qc->ap;
+	struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
+	struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
+	dma_addr_t addr = (dma_addr_t)&hsdev->sata_dwc_regs->dmadr;
+	struct dma_slave_config sconf;
+	struct dma_async_tx_descriptor *desc;
 
-static int dma_dwc_xfer_setup(struct scatterlist *sg, int num_elems,
-			      struct lli *lli, dma_addr_t dma_lli,
-			      void __iomem *addr, int dir)
-{
-	int dma_ch;
-	int num_lli;
-	/* Acquire DMA channel */
-	dma_ch = dma_request_channel();
-	if (dma_ch == -1) {
-		dev_err(host_pvt.dwc_dev, "%s: dma channel unavailable\n",
-			 __func__);
-		return -EAGAIN;
+	if (qc->dma_dir == DMA_DEV_TO_MEM) {
+		sconf.src_addr = addr;
+		sconf.device_fc = true;
+	} else {	/* DMA_MEM_TO_DEV */
+		sconf.dst_addr = addr;
+		sconf.device_fc = false;
 	}
 
+	sconf.direction = qc->dma_dir;
+	sconf.src_maxburst = AHB_DMA_BRST_DFLT;
+	sconf.dst_maxburst = AHB_DMA_BRST_DFLT;
+	sconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	sconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	dmaengine_slave_config(hsdevp->chan, &sconf);
+
 	/* Convert SG list to linked list of items (LLIs) for AHB DMA */
-	num_lli = map_sg_to_lli(sg, num_elems, lli, dma_lli, addr, dir);
+	desc = dmaengine_prep_slave_sg(hsdevp->chan, qc->sg, qc->n_elem,
+				       qc->dma_dir,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
-	dev_dbg(host_pvt.dwc_dev, "%s sg: 0x%p, count: %d lli: %p dma_lli:"
-		" 0x%0xlx addr: %p lli count: %d\n", __func__, sg, num_elems,
-		 lli, (u32)dma_lli, addr, num_lli);
+	if (!desc)
+		return NULL;
 
-	clear_chan_interrupts(dma_ch);
+	desc->callback = dma_dwc_xfer_done;
+	desc->callback_param = hsdev;
 
-	/* Program the CFG register. */
-	out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].cfg.high),
-		 DMA_CFG_HW_HS_SRC(dma_ch) | DMA_CFG_HW_HS_DEST(dma_ch) |
-		 DMA_CFG_PROTCTL | DMA_CFG_FCMOD_REQ);
-	out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].cfg.low),
-		 DMA_CFG_HW_CH_PRIOR(dma_ch));
+	dev_dbg(hsdev->dev, "%s sg: 0x%p, count: %d addr: %pad\n",
+		__func__, qc->sg, qc->n_elem, &addr);
 
-	/* Program the address of the linked list */
-	out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].llp.low),
-		 DMA_LLP_LMS(dma_lli, DMA_LLP_AHBMASTER2));
-
-	/* Program the CTL register with src enable / dst enable */
-	out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].ctl.low),
-		 DMA_CTL_LLP_SRCEN | DMA_CTL_LLP_DSTEN);
-	return dma_ch;
-}
-
-/*
- * Function: dma_dwc_exit
- * arguments: None
- * returns status
- * This function exits the SATA DMA driver
- */
-static void dma_dwc_exit(struct sata_dwc_device *hsdev)
-{
-	dev_dbg(host_pvt.dwc_dev, "%s:\n", __func__);
-	if (host_pvt.sata_dma_regs) {
-		iounmap((void __iomem *)host_pvt.sata_dma_regs);
-		host_pvt.sata_dma_regs = NULL;
-	}
-
-	if (hsdev->irq_dma) {
-		free_irq(hsdev->irq_dma, hsdev);
-		hsdev->irq_dma = 0;
-	}
-}
-
-/*
- * Function: dma_dwc_init
- * arguments: hsdev
- * returns status
- * This function initializes the SATA DMA driver
- */
-static int dma_dwc_init(struct sata_dwc_device *hsdev, int irq)
-{
-	int err;
-
-	err = dma_request_interrupts(hsdev, irq);
-	if (err) {
-		dev_err(host_pvt.dwc_dev, "%s: dma_request_interrupts returns"
-			" %d\n", __func__, err);
-		return err;
-	}
-
-	/* Enabe DMA */
-	out_le32(&(host_pvt.sata_dma_regs->dma_cfg.low), DMA_EN);
-
-	dev_notice(host_pvt.dwc_dev, "DMA initialized\n");
-	dev_dbg(host_pvt.dwc_dev, "SATA DMA registers=0x%p\n", host_pvt.\
-		sata_dma_regs);
-
-	return 0;
+	return desc;
 }
 
 static int sata_dwc_scr_read(struct ata_link *link, unsigned int scr, u32 *val)
@@ -891,21 +409,18 @@
 	struct ata_queued_cmd *qc;
 	u32 serror;
 	u8 status, tag;
-	u32 err_reg;
 
 	ata_ehi_clear_desc(ehi);
 
 	serror = core_scr_read(SCR_ERROR);
 	status = ap->ops->sff_check_status(ap);
 
-	err_reg = in_le32(&(host_pvt.sata_dma_regs->interrupt_status.error.\
-			low));
 	tag = ap->link.active_tag;
 
-	dev_err(ap->dev, "%s SCR_ERROR=0x%08x intpr=0x%08x status=0x%08x "
-		"dma_intp=%d pending=%d issued=%d dma_err_status=0x%08x\n",
-		__func__, serror, intpr, status, host_pvt.dma_interrupt_count,
-		hsdevp->dma_pending[tag], hsdevp->cmd_issued[tag], err_reg);
+	dev_err(ap->dev,
+		"%s SCR_ERROR=0x%08x intpr=0x%08x status=0x%08x dma_intp=%d pending=%d issued=%d",
+		__func__, serror, intpr, status, hsdevp->dma_interrupt_count,
+		hsdevp->dma_pending[tag], hsdevp->cmd_issued[tag]);
 
 	/* Clear error register and interrupt bit */
 	clear_serror();
@@ -1003,8 +518,9 @@
 
 		/* DEV interrupt w/ no active qc? */
 		if (unlikely(!qc || (qc->tf.flags & ATA_TFLAG_POLLING))) {
-			dev_err(ap->dev, "%s interrupt with no active qc "
-				"qc=%p\n", __func__, qc);
+			dev_err(ap->dev,
+				"%s interrupt with no active qc qc=%p\n",
+				__func__, qc);
 			ap->ops->sff_check_status(ap);
 			handled = 1;
 			goto DONE;
@@ -1031,16 +547,16 @@
 			 * operation done interrupt. The command should be
 			 * completed only after both interrupts are seen.
 			 */
-			host_pvt.dma_interrupt_count++;
+			hsdevp->dma_interrupt_count++;
 			if (hsdevp->dma_pending[tag] == \
 					SATA_DWC_DMA_PENDING_NONE) {
-				dev_err(ap->dev, "%s: DMA not pending "
-					"intpr=0x%08x status=0x%08x pending"
-					"=%d\n", __func__, intpr, status,
+				dev_err(ap->dev,
+					"%s: DMA not pending intpr=0x%08x status=0x%08x pending=%d\n",
+					__func__, intpr, status,
 					hsdevp->dma_pending[tag]);
 			}
 
-			if ((host_pvt.dma_interrupt_count % 2) == 0)
+			if ((hsdevp->dma_interrupt_count % 2) == 0)
 				sata_dwc_dma_xfer_complete(ap, 1);
 		} else if (ata_is_pio(qc->tf.protocol)) {
 			ata_sff_hsm_move(ap, qc, status, 0);
@@ -1068,17 +584,17 @@
 
 	if (sactive != 0 || (host_pvt.sata_dwc_sactive_issued) > 1 || \
 							tag_mask > 1) {
-		dev_dbg(ap->dev, "%s NCQ:sactive=0x%08x  sactive_issued=0x%08x"
-			"tag_mask=0x%08x\n", __func__, sactive,
-			host_pvt.sata_dwc_sactive_issued, tag_mask);
+		dev_dbg(ap->dev,
+			"%s NCQ:sactive=0x%08x  sactive_issued=0x%08x tag_mask=0x%08x\n",
+			__func__, sactive, host_pvt.sata_dwc_sactive_issued,
+			tag_mask);
 	}
 
 	if ((tag_mask | (host_pvt.sata_dwc_sactive_issued)) != \
 					(host_pvt.sata_dwc_sactive_issued)) {
-		dev_warn(ap->dev, "Bad tag mask?  sactive=0x%08x "
-			 "(host_pvt.sata_dwc_sactive_issued)=0x%08x  tag_mask"
-			 "=0x%08x\n", sactive, host_pvt.sata_dwc_sactive_issued,
-			  tag_mask);
+		dev_warn(ap->dev,
+			 "Bad tag mask?  sactive=0x%08x (host_pvt.sata_dwc_sactive_issued)=0x%08x  tag_mask=0x%08x\n",
+			 sactive, host_pvt.sata_dwc_sactive_issued, tag_mask);
 	}
 
 	/* read just to clear ... not bad if currently still busy */
@@ -1114,12 +630,12 @@
 		dev_dbg(ap->dev, "%s NCQ command, protocol: %s\n", __func__,
 			get_prot_descript(qc->tf.protocol));
 		if (ata_is_dma(qc->tf.protocol)) {
-			host_pvt.dma_interrupt_count++;
+			hsdevp->dma_interrupt_count++;
 			if (hsdevp->dma_pending[tag] == \
 					SATA_DWC_DMA_PENDING_NONE)
 				dev_warn(ap->dev, "%s: DMA not pending?\n",
 					__func__);
-			if ((host_pvt.dma_interrupt_count % 2) == 0)
+			if ((hsdevp->dma_interrupt_count % 2) == 0)
 				sata_dwc_dma_xfer_complete(ap, 1);
 		} else {
 			if (unlikely(sata_dwc_qc_complete(ap, qc, 1)))
@@ -1142,8 +658,9 @@
 	 */
 	sactive2 = core_scr_read(SCR_ACTIVE);
 	if (sactive2 != sactive) {
-		dev_dbg(ap->dev, "More completed - sactive=0x%x sactive2"
-			"=0x%x\n", sactive, sactive2);
+		dev_dbg(ap->dev,
+			"More completed - sactive=0x%x sactive2=0x%x\n",
+			sactive, sactive2);
 	}
 	handled = 1;
 
@@ -1169,11 +686,10 @@
 		 * This should not happen, it indicates the driver is out of
 		 * sync.  If it does happen, clear dmacr anyway.
 		 */
-		dev_err(host_pvt.dwc_dev, "%s DMA protocol RX and"
-			"TX DMA not pending tag=0x%02x pending=%d"
-			" dmacr: 0x%08x\n", __func__, tag,
-			hsdevp->dma_pending[tag],
-			in_le32(&(hsdev->sata_dwc_regs->dmacr)));
+		dev_err(hsdev->dev,
+			"%s DMA protocol RX and TX DMA not pending tag=0x%02x pending=%d dmacr: 0x%08x\n",
+			__func__, tag, hsdevp->dma_pending[tag],
+			in_le32(&hsdev->sata_dwc_regs->dmacr));
 		out_le32(&(hsdev->sata_dwc_regs->dmacr),
 			SATA_DWC_DMACR_TXRXCH_CLEAR);
 	}
@@ -1195,8 +711,9 @@
 
 #ifdef DEBUG_NCQ
 	if (tag > 0) {
-		dev_info(ap->dev, "%s tag=%u cmd=0x%02x dma dir=%s proto=%s "
-			 "dmacr=0x%08x\n", __func__, qc->tag, qc->tf.command,
+		dev_info(ap->dev,
+			 "%s tag=%u cmd=0x%02x dma dir=%s proto=%s dmacr=0x%08x\n",
+			 __func__, qc->tag, qc->tf.command,
 			 get_dma_dir_descript(qc->dma_dir),
 			 get_prot_descript(qc->tf.protocol),
 			 in_le32(&(hsdev->sata_dwc_regs->dmacr)));
@@ -1205,8 +722,9 @@
 
 	if (ata_is_dma(qc->tf.protocol)) {
 		if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_NONE) {
-			dev_err(ap->dev, "%s DMA protocol RX and TX DMA not "
-				"pending dmacr: 0x%08x\n", __func__,
+			dev_err(ap->dev,
+				"%s DMA protocol RX and TX DMA not pending dmacr: 0x%08x\n",
+				__func__,
 				in_le32(&(hsdev->sata_dwc_regs->dmacr)));
 		}
 
@@ -1232,9 +750,9 @@
 		dev_err(ap->dev, "TX DMA PENDING\n");
 	else if (hsdevp->dma_pending[tag] == SATA_DWC_DMA_PENDING_RX)
 		dev_err(ap->dev, "RX DMA PENDING\n");
-	dev_dbg(ap->dev, "QC complete cmd=0x%02x status=0x%02x ata%u:"
-		" protocol=%d\n", qc->tf.command, status, ap->print_id,
-		 qc->tf.protocol);
+	dev_dbg(ap->dev,
+		"QC complete cmd=0x%02x status=0x%02x ata%u: protocol=%d\n",
+		qc->tf.command, status, ap->print_id, qc->tf.protocol);
 
 	/* clear active bit */
 	mask = (~(qcmd_tag_to_mask(tag)));
@@ -1260,11 +778,23 @@
 	 */
 	out_le32(&hsdev->sata_dwc_regs->errmr, SATA_DWC_SERROR_ERR_BITS);
 
-	dev_dbg(host_pvt.dwc_dev, "%s: INTMR = 0x%08x, ERRMR = 0x%08x\n",
+	dev_dbg(hsdev->dev, "%s: INTMR = 0x%08x, ERRMR = 0x%08x\n",
 		 __func__, in_le32(&hsdev->sata_dwc_regs->intmr),
 		in_le32(&hsdev->sata_dwc_regs->errmr));
 }
 
+static bool sata_dwc_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct sata_dwc_device_port *hsdevp = param;
+	struct dw_dma_slave *dws = hsdevp->dws;
+
+	if (dws->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = dws;
+	return true;
+}
+
 static void sata_dwc_setup_port(struct ata_ioports *port, unsigned long base)
 {
 	port->cmd_addr = (void __iomem *)base + 0x00;
@@ -1299,6 +829,7 @@
 	struct sata_dwc_device *hsdev;
 	struct sata_dwc_device_port *hsdevp = NULL;
 	struct device *pdev;
+	dma_cap_mask_t mask;
 	int i;
 
 	hsdev = HSDEV_FROM_AP(ap);
@@ -1322,29 +853,27 @@
 	}
 	hsdevp->hsdev = hsdev;
 
+	hsdevp->dws = &sata_dwc_dma_dws;
+	hsdevp->dws->dma_dev = hsdev->dev;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* Acquire DMA channel */
+	hsdevp->chan = dma_request_channel(mask, sata_dwc_dma_filter, hsdevp);
+	if (!hsdevp->chan) {
+		dev_err(hsdev->dev, "%s: dma channel unavailable\n",
+			 __func__);
+		err = -EAGAIN;
+		goto CLEANUP_ALLOC;
+	}
+
 	for (i = 0; i < SATA_DWC_QCMD_MAX; i++)
 		hsdevp->cmd_issued[i] = SATA_DWC_CMD_ISSUED_NOT;
 
 	ap->bmdma_prd = NULL;	/* set these so libata doesn't use them */
 	ap->bmdma_prd_dma = 0;
 
-	/*
-	 * DMA - Assign scatter gather LLI table. We can't use the libata
-	 * version since it's PRD is IDE PCI specific.
-	 */
-	for (i = 0; i < SATA_DWC_QCMD_MAX; i++) {
-		hsdevp->llit[i] = dma_alloc_coherent(pdev,
-						     SATA_DWC_DMAC_LLI_TBL_SZ,
-						     &(hsdevp->llit_dma[i]),
-						     GFP_ATOMIC);
-		if (!hsdevp->llit[i]) {
-			dev_err(ap->dev, "%s: dma_alloc_coherent failed\n",
-				 __func__);
-			err = -ENOMEM;
-			goto CLEANUP_ALLOC;
-		}
-	}
-
 	if (ap->port_no == 0)  {
 		dev_dbg(ap->dev, "%s: clearing TXCHEN, RXCHEN in DMAC\n",
 			__func__);
@@ -1373,22 +902,14 @@
 
 static void sata_dwc_port_stop(struct ata_port *ap)
 {
-	int i;
-	struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
 	struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
 
 	dev_dbg(ap->dev, "%s: ap->id = %d\n", __func__, ap->print_id);
 
-	if (hsdevp && hsdev) {
-		/* deallocate LLI table */
-		for (i = 0; i < SATA_DWC_QCMD_MAX; i++) {
-			dma_free_coherent(ap->host->dev,
-					  SATA_DWC_DMAC_LLI_TBL_SZ,
-					 hsdevp->llit[i], hsdevp->llit_dma[i]);
-		}
+	dmaengine_terminate_all(hsdevp->chan);
+	dma_release_channel(hsdevp->chan);
 
-		kfree(hsdevp);
-	}
+	kfree(hsdevp);
 	ap->private_data = NULL;
 }
 
@@ -1444,12 +965,12 @@
 static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag)
 {
 	int start_dma;
-	u32 reg, dma_chan;
+	u32 reg;
 	struct sata_dwc_device *hsdev = HSDEV_FROM_QC(qc);
 	struct ata_port *ap = qc->ap;
 	struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
+	struct dma_async_tx_descriptor *desc = hsdevp->desc[tag];
 	int dir = qc->dma_dir;
-	dma_chan = hsdevp->dma_chan[tag];
 
 	if (hsdevp->cmd_issued[tag] != SATA_DWC_CMD_ISSUED_NOT) {
 		start_dma = 1;
@@ -1458,16 +979,17 @@
 		else
 			hsdevp->dma_pending[tag] = SATA_DWC_DMA_PENDING_RX;
 	} else {
-		dev_err(ap->dev, "%s: Command not pending cmd_issued=%d "
-			"(tag=%d) DMA NOT started\n", __func__,
-			hsdevp->cmd_issued[tag], tag);
+		dev_err(ap->dev,
+			"%s: Command not pending cmd_issued=%d (tag=%d) DMA NOT started\n",
+			__func__, hsdevp->cmd_issued[tag], tag);
 		start_dma = 0;
 	}
 
-	dev_dbg(ap->dev, "%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s "
-		"start_dma? %x\n", __func__, qc, tag, qc->tf.command,
+	dev_dbg(ap->dev,
+		"%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s start_dma? %x\n",
+		__func__, qc, tag, qc->tf.command,
 		get_dma_dir_descript(qc->dma_dir), start_dma);
-	sata_dwc_tf_dump(&(qc->tf));
+	sata_dwc_tf_dump(ap, &qc->tf);
 
 	if (start_dma) {
 		reg = core_scr_read(SCR_ERROR);
@@ -1484,7 +1006,8 @@
 				SATA_DWC_DMACR_RXCHEN);
 
 		/* Enable AHB DMA transfer on the specified channel */
-		dma_dwc_xfer_start(dma_chan);
+		dmaengine_submit(desc);
+		dma_async_issue_pending(hsdevp->chan);
 	}
 }
 
@@ -1510,26 +1033,21 @@
  */
 static void sata_dwc_qc_prep_by_tag(struct ata_queued_cmd *qc, u8 tag)
 {
-	struct scatterlist *sg = qc->sg;
+	struct dma_async_tx_descriptor *desc;
 	struct ata_port *ap = qc->ap;
-	int dma_chan;
-	struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
 	struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
 
 	dev_dbg(ap->dev, "%s: port=%d dma dir=%s n_elem=%d\n",
 		__func__, ap->port_no, get_dma_dir_descript(qc->dma_dir),
 		 qc->n_elem);
 
-	dma_chan = dma_dwc_xfer_setup(sg, qc->n_elem, hsdevp->llit[tag],
-				      hsdevp->llit_dma[tag],
-				      (void __iomem *)&hsdev->sata_dwc_regs->dmadr,
-				      qc->dma_dir);
-	if (dma_chan < 0) {
-		dev_err(ap->dev, "%s: dma_dwc_xfer_setup returns err %d\n",
-			__func__, dma_chan);
+	desc = dma_dwc_xfer_setup(qc);
+	if (!desc) {
+		dev_err(ap->dev, "%s: dma_dwc_xfer_setup returns NULL\n",
+			__func__);
 		return;
 	}
-	hsdevp->dma_chan[tag] = dma_chan;
+	hsdevp->desc[tag] = desc;
 }
 
 static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc)
@@ -1540,8 +1058,8 @@
 
 #ifdef DEBUG_NCQ
 	if (qc->tag > 0 || ap->link.sactive > 1)
-		dev_info(ap->dev, "%s ap id=%d cmd(0x%02x)=%s qc tag=%d "
-			 "prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n",
+		dev_info(ap->dev,
+			 "%s ap id=%d cmd(0x%02x)=%s qc tag=%d prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n",
 			 __func__, ap->print_id, qc->tf.command,
 			 ata_get_cmd_descript(qc->tf.command),
 			 qc->tag, get_prot_descript(qc->tf.protocol),
@@ -1557,9 +1075,9 @@
 		sactive |= (0x00000001 << tag);
 		core_scr_write(SCR_ACTIVE, sactive);
 
-		dev_dbg(qc->ap->dev, "%s: tag=%d ap->link.sactive = 0x%08x "
-			"sactive=0x%08x\n", __func__, tag, qc->ap->link.sactive,
-			sactive);
+		dev_dbg(qc->ap->dev,
+			"%s: tag=%d ap->link.sactive = 0x%08x sactive=0x%08x\n",
+			__func__, tag, qc->ap->link.sactive, sactive);
 
 		ap->ops->sff_tf_load(ap, &qc->tf);
 		sata_dwc_exec_command_by_tag(ap, &qc->tf, qc->tag,
@@ -1673,7 +1191,6 @@
 	struct ata_port_info pi = sata_dwc_port_info[0];
 	const struct ata_port_info *ppi[] = { &pi, NULL };
 	struct device_node *np = ofdev->dev.of_node;
-	u32 dma_chan;
 
 	/* Allocate DWC SATA device */
 	host = ata_host_alloc_pinfo(&ofdev->dev, ppi, SATA_DWC_MAX_PORTS);
@@ -1683,18 +1200,11 @@
 
 	host->private_data = hsdev;
 
-	if (of_property_read_u32(np, "dma-channel", &dma_chan)) {
-		dev_warn(&ofdev->dev, "no dma-channel property set."
-			 " Use channel 0\n");
-		dma_chan = 0;
-	}
-	host_pvt.dma_channel = dma_chan;
-
 	/* Ioremap SATA registers */
 	base = of_iomap(np, 0);
 	if (!base) {
-		dev_err(&ofdev->dev, "ioremap failed for SATA register"
-			" address\n");
+		dev_err(&ofdev->dev,
+			"ioremap failed for SATA register address\n");
 		return -ENODEV;
 	}
 	hsdev->reg_base = base;
@@ -1716,27 +1226,29 @@
 		   idr, ver[0], ver[1], ver[2]);
 
 	/* Get SATA DMA interrupt number */
-	irq = irq_of_parse_and_map(np, 1);
-	if (irq == NO_IRQ) {
+	hsdev->dma->irq = irq_of_parse_and_map(np, 1);
+	if (hsdev->dma->irq == NO_IRQ) {
 		dev_err(&ofdev->dev, "no SATA DMA irq\n");
 		err = -ENODEV;
 		goto error_iomap;
 	}
 
 	/* Get physical SATA DMA register base address */
-	host_pvt.sata_dma_regs = (void *)of_iomap(np, 1);
-	if (!(host_pvt.sata_dma_regs)) {
-		dev_err(&ofdev->dev, "ioremap failed for AHBDMA register"
-			" address\n");
+	hsdev->dma->regs = of_iomap(np, 1);
+	if (!hsdev->dma->regs) {
+		dev_err(&ofdev->dev,
+			"ioremap failed for AHBDMA register address\n");
 		err = -ENODEV;
 		goto error_iomap;
 	}
 
 	/* Save dev for later use in dev_xxx() routines */
-	host_pvt.dwc_dev = &ofdev->dev;
+	hsdev->dev = &ofdev->dev;
+
+	hsdev->dma->dev = &ofdev->dev;
 
 	/* Initialize AHB DMAC */
-	err = dma_dwc_init(hsdev, irq);
+	err = dw_dma_probe(hsdev->dma, NULL);
 	if (err)
 		goto error_dma_iomap;
 
@@ -1765,9 +1277,9 @@
 
 error_out:
 	/* Free SATA DMA resources */
-	dma_dwc_exit(hsdev);
+	dw_dma_remove(hsdev->dma);
 error_dma_iomap:
-	iounmap((void __iomem *)host_pvt.sata_dma_regs);
+	iounmap(hsdev->dma->regs);
 error_iomap:
 	iounmap(base);
 	return err;
@@ -1782,9 +1294,9 @@
 	ata_host_detach(host);
 
 	/* Free SATA DMA resources */
-	dma_dwc_exit(hsdev);
+	dw_dma_remove(hsdev->dma);
 
-	iounmap((void __iomem *)host_pvt.sata_dma_regs);
+	iounmap(hsdev->dma->regs);
 	iounmap(hsdev->reg_base);
 	dev_dbg(&ofdev->dev, "done\n");
 	return 0;
@@ -1809,5 +1321,5 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mark Miesfeld <mmiesfeld@amcc.com>");
-MODULE_DESCRIPTION("DesignWare Cores SATA controller low lever driver");
+MODULE_DESCRIPTION("DesignWare Cores SATA controller low level driver");
 MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 0698278..e81a821 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -856,13 +856,13 @@
 	}
 
 	/* Set dma_mask.  This devices doesn't support 64bit addressing. */
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc) {
 		dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 		return rc;
 	}
 
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc) {
 		dev_err(&pdev->dev, "32-bit consistent DMA enable failed\n");
 		return rc;
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index f8c33e3..bd74ee5 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -306,6 +306,11 @@
 	MV5_PHY_CTL		= 0x0C,
 	SATA_IFCFG		= 0x050,
 	LP_PHY_CTL		= 0x058,
+	LP_PHY_CTL_PIN_PU_PLL   = (1 << 0),
+	LP_PHY_CTL_PIN_PU_RX    = (1 << 1),
+	LP_PHY_CTL_PIN_PU_TX    = (1 << 2),
+	LP_PHY_CTL_GEN_TX_3G    = (1 << 5),
+	LP_PHY_CTL_GEN_RX_3G    = (1 << 9),
 
 	MV_M2_PREAMP_MASK	= 0x7e0,
 
@@ -1391,10 +1396,17 @@
 				/*
 				 * Set PHY speed according to SControl speed.
 				 */
-				if ((val & 0xf0) == 0x10)
-					writelfl(0x7, lp_phy_addr);
-				else
-					writelfl(0x227, lp_phy_addr);
+				u32 lp_phy_val =
+					LP_PHY_CTL_PIN_PU_PLL |
+					LP_PHY_CTL_PIN_PU_RX  |
+					LP_PHY_CTL_PIN_PU_TX;
+
+				if ((val & 0xf0) != 0x10)
+					lp_phy_val |=
+						LP_PHY_CTL_GEN_TX_3G |
+						LP_PHY_CTL_GEN_RX_3G;
+
+				writelfl(lp_phy_val, lp_phy_addr);
 			}
 		}
 		writelfl(val, addr);
@@ -4308,10 +4320,10 @@
 {
 	int rc;
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 			if (rc) {
 				dev_err(&pdev->dev,
 					"64-bit DMA enable failed\n");
@@ -4319,12 +4331,12 @@
 			}
 		}
 	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 			return rc;
 		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev,
 				"32-bit consistent DMA enable failed\n");
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 1db6f5c..7ece85f 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -756,10 +756,10 @@
 			blk_queue_bounce_limit(sdev1->request_queue,
 					       ATA_DMA_MASK);
 
-		pci_set_dma_mask(pdev, ATA_DMA_MASK);
+		dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	} else {
 		/** This shouldn't fail as it was set to this value before */
-		pci_set_dma_mask(pdev, pp->adma_dma_mask);
+		dma_set_mask(&pdev->dev, pp->adma_dma_mask);
 		if (sdev0)
 			blk_queue_bounce_limit(sdev0->request_queue,
 					       pp->adma_dma_mask);
@@ -1133,10 +1133,10 @@
 
 	/* Ensure DMA mask is set to 32-bit before allocating legacy PRD and
 	   pad buffers */
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc)
 		return rc;
 
@@ -1161,8 +1161,8 @@
 	   These are allowed to fail since we store the value that ends up
 	   being used to set as the bounce limit in slave_config later if
 	   needed. */
-	pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 	pp->adma_dma_mask = *dev->dma_mask;
 
 	mem = dmam_alloc_coherent(dev, NV_ADMA_PORT_PRIV_DMA_SZ,
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 3638887..0fa211e 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -1246,10 +1246,10 @@
 	/* initialize adapter */
 	pdc_host_init(host);
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index 9a6bd4c..af987a4 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -557,10 +557,10 @@
 	int rc, have_64bit_bus = (bus_info & QS_HPHY_64BIT);
 
 	if (have_64bit_bus &&
-	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	    !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 			if (rc) {
 				dev_err(&pdev->dev,
 					"64-bit DMA enable failed\n");
@@ -568,12 +568,12 @@
 			}
 		}
 	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 			return rc;
 		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev,
 				"32-bit consistent DMA enable failed\n");
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 40b76b2..dea6edcb 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -770,10 +770,10 @@
 		return rc;
 	host->iomap = pcim_iomap_table(pdev);
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index ba2667f..4b1995e 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -246,7 +246,7 @@
 	/* host flags */
 	SIL24_COMMON_FLAGS	= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
 				  ATA_FLAG_NCQ | ATA_FLAG_ACPI_SATA |
-				  ATA_FLAG_AN | ATA_FLAG_PMP | ATA_FLAG_LOWTAG,
+				  ATA_FLAG_AN | ATA_FLAG_PMP,
 	SIL24_FLAG_PCIX_IRQ_WOC	= (1 << 24), /* IRQ loss errata on PCI-X */
 
 	IRQ_STAT_4PORTS		= 0xf,
@@ -1312,10 +1312,10 @@
 	host->iomap = iomap;
 
 	/* configure and activate the device */
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
 		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 			if (rc) {
 				dev_err(&pdev->dev,
 					"64-bit DMA enable failed\n");
@@ -1323,12 +1323,12 @@
 			}
 		}
 	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev, "32-bit DMA enable failed\n");
 			return rc;
 		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc) {
 			dev_err(&pdev->dev,
 				"32-bit consistent DMA enable failed\n");
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index c630fa8..ff8307b 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -496,10 +496,10 @@
 		ata_port_pbar_desc(ap, 5, offset, "port");
 	}
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 39b5de6..3a18a8a 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -1476,10 +1476,10 @@
 	}
 
 	/* configure and activate */
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 47bf894..17d31fc 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -502,10 +502,10 @@
 	for (i = 0; i < host->n_ports; i++)
 		vt6421_init_addrs(host->ports[i]);
 
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index 29e847a..183eb52 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -387,10 +387,10 @@
 	/*
 	 * Use 32 bit DMA mask, because 64 bit address support is poor.
 	 */
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc)
 		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc)
 		return rc;
 
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 0a53365..609e4c8 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -1,3 +1,6 @@
+# For include/trace/define_trace.h to include trace.h
+CFLAGS_regmap.o := -I$(src)
+
 obj-$(CONFIG_REGMAP) += regmap.o regcache.o
 obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 87db989..7eb7b3b 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -15,8 +15,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
-#include <trace/events/regmap.h>
 
+#include "trace.h"
 #include "internal.h"
 
 static const struct regcache_ops *cache_types[] = {
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index dbfe6a6..6273ff0 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -20,7 +20,7 @@
 #include <linux/sched.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/events/regmap.h>
+#include "trace.h"
 
 #include "internal.h"
 
diff --git a/include/trace/events/regmap.h b/drivers/base/regmap/trace.h
similarity index 96%
rename from include/trace/events/regmap.h
rename to drivers/base/regmap/trace.h
index 22317d2..64586a1 100644
--- a/include/trace/events/regmap.h
+++ b/drivers/base/regmap/trace.h
@@ -7,7 +7,7 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
-#include "../../../drivers/base/regmap/internal.h"
+#include "internal.h"
 
 /*
  * Log register events
@@ -247,5 +247,11 @@
 
 #endif /* _TRACE_REGMAP_H */
 
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
 /* This part must be outside protection */
 #include <trace/define_trace.h>
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 2b94403..f749df9 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -405,8 +405,8 @@
 		goto Enomem4;
 	}
 	hba[i]->access.set_intr_mask(hba[i], 0);
-	if (request_irq(hba[i]->intr, do_ida_intr,
-		IRQF_DISABLED|IRQF_SHARED, hba[i]->devname, hba[i]))
+	if (request_irq(hba[i]->intr, do_ida_intr, IRQF_SHARED,
+			hba[i]->devname, hba[i]))
 	{
 		printk(KERN_ERR "cpqarray: Unable to get irq %d for %s\n",
 				hba[i]->intr, hba[i]->devname);
diff --git a/drivers/bus/omap_l3_noc.c b/drivers/bus/omap_l3_noc.c
index 029bc73..11f7982 100644
--- a/drivers/bus/omap_l3_noc.c
+++ b/drivers/bus/omap_l3_noc.c
@@ -284,7 +284,7 @@
 	 */
 	l3->debug_irq = platform_get_irq(pdev, 0);
 	ret = devm_request_irq(l3->dev, l3->debug_irq, l3_interrupt_handler,
-			       IRQF_DISABLED, "l3-dbg-irq", l3);
+			       0x0, "l3-dbg-irq", l3);
 	if (ret) {
 		dev_err(l3->dev, "request_irq failed for %d\n",
 			l3->debug_irq);
@@ -293,7 +293,7 @@
 
 	l3->app_irq = platform_get_irq(pdev, 1);
 	ret = devm_request_irq(l3->dev, l3->app_irq, l3_interrupt_handler,
-			       IRQF_DISABLED, "l3-app-irq", l3);
+			       0x0, "l3-app-irq", l3);
 	if (ret)
 		dev_err(l3->dev, "request_irq failed for %d\n", l3->app_irq);
 
diff --git a/drivers/bus/omap_l3_smx.c b/drivers/bus/omap_l3_smx.c
index 597fdae..360a5c0 100644
--- a/drivers/bus/omap_l3_smx.c
+++ b/drivers/bus/omap_l3_smx.c
@@ -251,18 +251,16 @@
 	}
 
 	l3->debug_irq = platform_get_irq(pdev, 0);
-	ret = request_irq(l3->debug_irq, omap3_l3_app_irq,
-		IRQF_DISABLED | IRQF_TRIGGER_RISING,
-		"l3-debug-irq", l3);
+	ret = request_irq(l3->debug_irq, omap3_l3_app_irq, IRQF_TRIGGER_RISING,
+			  "l3-debug-irq", l3);
 	if (ret) {
 		dev_err(&pdev->dev, "couldn't request debug irq\n");
 		goto err1;
 	}
 
 	l3->app_irq = platform_get_irq(pdev, 1);
-	ret = request_irq(l3->app_irq, omap3_l3_app_irq,
-		IRQF_DISABLED | IRQF_TRIGGER_RISING,
-		"l3-app-irq", l3);
+	ret = request_irq(l3->app_irq, omap3_l3_app_irq, IRQF_TRIGGER_RISING,
+			  "l3-app-irq", l3);
 	if (ret) {
 		dev_err(&pdev->dev, "couldn't request app irq\n");
 		goto err2;
diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
index 79524ed..8753b0f 100644
--- a/drivers/char/ipmi/ipmi_powernv.c
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -125,6 +125,7 @@
 	spin_lock_irqsave(&smi->msg_lock, flags);
 
 	if (!smi->cur_msg) {
+		spin_unlock_irqrestore(&smi->msg_lock, flags);
 		pr_warn("no current message?\n");
 		return 0;
 	}
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index f6646ed..518585c 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -263,6 +263,11 @@
 	bool supports_event_msg_buff;
 
 	/*
+	 * Can we clear the global enables receive irq bit?
+	 */
+	bool cannot_clear_recv_irq_bit;
+
+	/*
 	 * Did we get an attention that we did not handle?
 	 */
 	bool got_attn;
@@ -461,6 +466,9 @@
  * allocate messages, we just leave them in the BMC and run the system
  * polled until we can allocate some memory.  Once we have some
  * memory, we will re-enable the interrupt.
+ *
+ * Note that we cannot just use disable_irq(), since the interrupt may
+ * be shared.
  */
 static inline bool disable_si_irq(struct smi_info *smi_info)
 {
@@ -549,20 +557,15 @@
 
 	if (smi_info->supports_event_msg_buff)
 		enables |= IPMI_BMC_EVT_MSG_BUFF;
-	else
-		enables &= ~IPMI_BMC_EVT_MSG_BUFF;
 
-	if (smi_info->irq && !smi_info->interrupt_disabled)
+	if ((smi_info->irq && !smi_info->interrupt_disabled) ||
+	    smi_info->cannot_clear_recv_irq_bit)
 		enables |= IPMI_BMC_RCV_MSG_INTR;
-	else
-		enables &= ~IPMI_BMC_RCV_MSG_INTR;
 
 	if (smi_info->supports_event_msg_buff &&
 	    smi_info->irq && !smi_info->interrupt_disabled)
 
 		enables |= IPMI_BMC_EVT_MSG_INTR;
-	else
-		enables &= ~IPMI_BMC_EVT_MSG_INTR;
 
 	*irq_on = enables & (IPMI_BMC_EVT_MSG_INTR | IPMI_BMC_RCV_MSG_INTR);
 
@@ -2900,6 +2903,96 @@
 	return rv;
 }
 
+/*
+ * Some BMCs do not support clearing the receive irq bit in the global
+ * enables (even if they don't support interrupts on the BMC).  Check
+ * for this and handle it properly.
+ */
+static void check_clr_rcv_irq(struct smi_info *smi_info)
+{
+	unsigned char         msg[3];
+	unsigned char         *resp;
+	unsigned long         resp_len;
+	int                   rv;
+
+	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
+	if (!resp) {
+		printk(KERN_WARNING PFX "Out of memory allocating response for"
+		       " global enables command, cannot check recv irq bit"
+		       " handling.\n");
+		return;
+	}
+
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
+	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+
+	rv = wait_for_msg_done(smi_info);
+	if (rv) {
+		printk(KERN_WARNING PFX "Error getting response from get"
+		       " global enables command, cannot check recv irq bit"
+		       " handling.\n");
+		goto out;
+	}
+
+	resp_len = smi_info->handlers->get_result(smi_info->si_sm,
+						  resp, IPMI_MAX_MSG_LENGTH);
+
+	if (resp_len < 4 ||
+			resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
+			resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD   ||
+			resp[2] != 0) {
+		printk(KERN_WARNING PFX "Invalid return from get global"
+		       " enables command, cannot check recv irq bit"
+		       " handling.\n");
+		rv = -EINVAL;
+		goto out;
+	}
+
+	if ((resp[3] & IPMI_BMC_RCV_MSG_INTR) == 0)
+		/* Already clear, should work ok. */
+		goto out;
+
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
+	msg[2] = resp[3] & ~IPMI_BMC_RCV_MSG_INTR;
+	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+
+	rv = wait_for_msg_done(smi_info);
+	if (rv) {
+		printk(KERN_WARNING PFX "Error getting response from set"
+		       " global enables command, cannot check recv irq bit"
+		       " handling.\n");
+		goto out;
+	}
+
+	resp_len = smi_info->handlers->get_result(smi_info->si_sm,
+						  resp, IPMI_MAX_MSG_LENGTH);
+
+	if (resp_len < 3 ||
+			resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
+			resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) {
+		printk(KERN_WARNING PFX "Invalid return from get global"
+		       " enables command, cannot check recv irq bit"
+		       " handling.\n");
+		rv = -EINVAL;
+		goto out;
+	}
+
+	if (resp[2] != 0) {
+		/*
+		 * An error when setting the event buffer bit means
+		 * clearing the bit is not supported.
+		 */
+		printk(KERN_WARNING PFX "The BMC does not support clearing"
+		       " the recv irq bit, compensating, but the BMC needs to"
+		       " be fixed.\n");
+		smi_info->cannot_clear_recv_irq_bit = true;
+	}
+ out:
+	kfree(resp);
+}
+
 static int try_enable_event_buffer(struct smi_info *smi_info)
 {
 	unsigned char         msg[3];
@@ -3395,6 +3488,8 @@
 		goto out_err;
 	}
 
+	check_clr_rcv_irq(new_smi);
+
 	setup_oem_data_handler(new_smi);
 	setup_xaction_handlers(new_smi);
 
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index f6e378d..f40e3bd 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -468,11 +468,13 @@
 		int result;
 
 		/* Wait for something to do */
-		wait_for_completion(&ssif_info->wake_thread);
-		init_completion(&ssif_info->wake_thread);
-
+		result = wait_for_completion_interruptible(
+						&ssif_info->wake_thread);
 		if (ssif_info->stopping)
 			break;
+		if (result == -ERESTARTSYS)
+			continue;
+		init_completion(&ssif_info->wake_thread);
 
 		if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) {
 			result = i2c_smbus_write_block_data(
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index a3025e7..2664696 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -661,17 +661,17 @@
 };
 
 static bool __init
-arch_timer_probed(int type, const struct of_device_id *matches)
+arch_timer_needs_probing(int type, const struct of_device_id *matches)
 {
 	struct device_node *dn;
-	bool probed = true;
+	bool needs_probing = false;
 
 	dn = of_find_matching_node(NULL, matches);
 	if (dn && of_device_is_available(dn) && !(arch_timers_present & type))
-		probed = false;
+		needs_probing = true;
 	of_node_put(dn);
 
-	return probed;
+	return needs_probing;
 }
 
 static void __init arch_timer_common_init(void)
@@ -680,9 +680,9 @@
 
 	/* Wait until both nodes are probed if we have two timers */
 	if ((arch_timers_present & mask) != mask) {
-		if (!arch_timer_probed(ARCH_MEM_TIMER, arch_timer_mem_of_match))
+		if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match))
 			return;
-		if (!arch_timer_probed(ARCH_CP15_TIMER, arch_timer_of_match))
+		if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match))
 			return;
 	}
 
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index d305fb0..a19a3f6 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -108,7 +108,7 @@
 
 static u64 notrace read_sched_clock(void)
 {
-	return ~__raw_readl(sched_io_base);
+	return ~readl_relaxed(sched_io_base);
 }
 
 static const struct of_device_id sptimer_ids[] __initconst = {
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
index d0a7bd6..dc3c6ee 100644
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -210,7 +210,7 @@
 
 	ret = em_sti_start(p, USER_CLOCKSOURCE);
 	if (!ret)
-		__clocksource_updatefreq_hz(cs, p->rate);
+		__clocksource_update_freq_hz(cs, p->rate);
 	return ret;
 }
 
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 2bd13b5..b8ff3c6 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -641,7 +641,7 @@
 
 	ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
 	if (!ret) {
-		__clocksource_updatefreq_hz(cs, ch->rate);
+		__clocksource_update_freq_hz(cs, ch->rate);
 		ch->cs_enabled = true;
 	}
 	return ret;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index f150ca82..b6b8fa3 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -272,7 +272,7 @@
 
 	ret = sh_tmu_enable(ch);
 	if (!ret) {
-		__clocksource_updatefreq_hz(cs, ch->rate);
+		__clocksource_update_freq_hz(cs, ch->rate);
 		ch->cs_enabled = true;
 	}
 
diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index f4a9c00..1928a89 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -170,7 +170,15 @@
 	       TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M),
 	       timer_base + TIMER_CTL_REG(1));
 
-	sched_clock_register(sun4i_timer_sched_read, 32, rate);
+	/*
+	 * sched_clock_register does not have priorities, and on sun6i and
+	 * later there is a better sched_clock registered by arm_arch_timer.c
+	 */
+	if (of_machine_is_compatible("allwinner,sun4i-a10") ||
+	    of_machine_is_compatible("allwinner,sun5i-a13") ||
+	    of_machine_is_compatible("allwinner,sun5i-a10s"))
+		sched_clock_register(sun4i_timer_sched_read, 32, rate);
+
 	clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
 			      rate, 350, 32, clocksource_mmio_readl_down);
 
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index d2616ef..5a112d7 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -51,15 +51,15 @@
 static void __iomem *timer_reg_base;
 static void __iomem *rtc_base;
 
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static u64 persistent_ms, last_persistent_ms;
 
 static struct delay_timer tegra_delay_timer;
 
 #define timer_writel(value, reg) \
-	__raw_writel(value, timer_reg_base + (reg))
+	writel_relaxed(value, timer_reg_base + (reg))
 #define timer_readl(reg) \
-	__raw_readl(timer_reg_base + (reg))
+	readl_relaxed(timer_reg_base + (reg))
 
 static int tegra_timer_set_next_event(unsigned long cycles,
 					 struct clock_event_device *evt)
@@ -120,26 +120,25 @@
 }
 
 /*
- * tegra_read_persistent_clock -  Return time from a persistent clock.
+ * tegra_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  * Care must be taken that this funciton is not called while the
  * tegra_rtc driver could be executing to avoid race conditions
  * on the RTC shadow register
  */
-static void tegra_read_persistent_clock(struct timespec *ts)
+static void tegra_read_persistent_clock64(struct timespec64 *ts)
 {
 	u64 delta;
-	struct timespec *tsp = &persistent_ts;
 
 	last_persistent_ms = persistent_ms;
 	persistent_ms = tegra_rtc_read_ms();
 	delta = persistent_ms - last_persistent_ms;
 
-	timespec_add_ns(tsp, delta * NSEC_PER_MSEC);
-	*ts = *tsp;
+	timespec64_add_ns(&persistent_ts, delta * NSEC_PER_MSEC);
+	*ts = persistent_ts;
 }
 
 static unsigned long tegra_delay_timer_read_counter_long(void)
@@ -252,7 +251,7 @@
 	else
 		clk_prepare_enable(clk);
 
-	register_persistent_clock(NULL, tegra_read_persistent_clock);
+	register_persistent_clock(NULL, tegra_read_persistent_clock64);
 }
 CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
 
diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c
index ec57ba2..5b6e3d5 100644
--- a/drivers/clocksource/time-efm32.c
+++ b/drivers/clocksource/time-efm32.c
@@ -111,7 +111,7 @@
 static struct efm32_clock_event_ddata clock_event_ddata = {
 	.evtdev = {
 		.name = "efm32 clockevent",
-		.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_MODE_PERIODIC,
+		.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
 		.set_mode = efm32_clock_event_set_mode,
 		.set_next_event = efm32_clock_event_set_next_event,
 		.rating = 200,
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index b5b4d45..c0304ff 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -61,12 +61,12 @@
 
 static inline unsigned int pit_read(void __iomem *base, unsigned int reg_offset)
 {
-	return __raw_readl(base + reg_offset);
+	return readl_relaxed(base + reg_offset);
 }
 
 static inline void pit_write(void __iomem *base, unsigned int reg_offset, unsigned long value)
 {
-	__raw_writel(value, base + reg_offset);
+	writel_relaxed(value, base + reg_offset);
 }
 
 /*
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 58597fb..28aa4b7 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -17,6 +17,7 @@
 #include <linux/irq.h>
 #include <linux/irqreturn.h>
 #include <linux/reset.h>
+#include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
@@ -36,8 +37,31 @@
 
 #define TIMER_SYNC_TICKS	3
 
-static void __iomem *timer_base;
-static u32 ticks_per_jiffy;
+struct sun5i_timer {
+	void __iomem		*base;
+	struct clk		*clk;
+	struct notifier_block	clk_rate_cb;
+	u32			ticks_per_jiffy;
+};
+
+#define to_sun5i_timer(x) \
+	container_of(x, struct sun5i_timer, clk_rate_cb)
+
+struct sun5i_timer_clksrc {
+	struct sun5i_timer	timer;
+	struct clocksource	clksrc;
+};
+
+#define to_sun5i_timer_clksrc(x) \
+	container_of(x, struct sun5i_timer_clksrc, clksrc)
+
+struct sun5i_timer_clkevt {
+	struct sun5i_timer		timer;
+	struct clock_event_device	clkevt;
+};
+
+#define to_sun5i_timer_clkevt(x) \
+	container_of(x, struct sun5i_timer_clkevt, clkevt)
 
 /*
  * When we disable a timer, we need to wait at least for 2 cycles of
@@ -45,30 +69,30 @@
  * that is already setup and runs at the same frequency than the other
  * timers, and we never will be disabled.
  */
-static void sun5i_clkevt_sync(void)
+static void sun5i_clkevt_sync(struct sun5i_timer_clkevt *ce)
 {
-	u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+	u32 old = readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1));
 
-	while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
+	while ((old - readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
 		cpu_relax();
 }
 
-static void sun5i_clkevt_time_stop(u8 timer)
+static void sun5i_clkevt_time_stop(struct sun5i_timer_clkevt *ce, u8 timer)
 {
-	u32 val = readl(timer_base + TIMER_CTL_REG(timer));
-	writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer));
+	u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer));
+	writel(val & ~TIMER_CTL_ENABLE, ce->timer.base + TIMER_CTL_REG(timer));
 
-	sun5i_clkevt_sync();
+	sun5i_clkevt_sync(ce);
 }
 
-static void sun5i_clkevt_time_setup(u8 timer, u32 delay)
+static void sun5i_clkevt_time_setup(struct sun5i_timer_clkevt *ce, u8 timer, u32 delay)
 {
-	writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer));
+	writel(delay, ce->timer.base + TIMER_INTVAL_LO_REG(timer));
 }
 
-static void sun5i_clkevt_time_start(u8 timer, bool periodic)
+static void sun5i_clkevt_time_start(struct sun5i_timer_clkevt *ce, u8 timer, bool periodic)
 {
-	u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+	u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer));
 
 	if (periodic)
 		val &= ~TIMER_CTL_ONESHOT;
@@ -76,75 +100,230 @@
 		val |= TIMER_CTL_ONESHOT;
 
 	writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
-	       timer_base + TIMER_CTL_REG(timer));
+	       ce->timer.base + TIMER_CTL_REG(timer));
 }
 
 static void sun5i_clkevt_mode(enum clock_event_mode mode,
-			      struct clock_event_device *clk)
+			      struct clock_event_device *clkevt)
 {
+	struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt);
+
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
-		sun5i_clkevt_time_stop(0);
-		sun5i_clkevt_time_setup(0, ticks_per_jiffy);
-		sun5i_clkevt_time_start(0, true);
+		sun5i_clkevt_time_stop(ce, 0);
+		sun5i_clkevt_time_setup(ce, 0, ce->timer.ticks_per_jiffy);
+		sun5i_clkevt_time_start(ce, 0, true);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
-		sun5i_clkevt_time_stop(0);
-		sun5i_clkevt_time_start(0, false);
+		sun5i_clkevt_time_stop(ce, 0);
+		sun5i_clkevt_time_start(ce, 0, false);
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
 	default:
-		sun5i_clkevt_time_stop(0);
+		sun5i_clkevt_time_stop(ce, 0);
 		break;
 	}
 }
 
 static int sun5i_clkevt_next_event(unsigned long evt,
-				   struct clock_event_device *unused)
+				   struct clock_event_device *clkevt)
 {
-	sun5i_clkevt_time_stop(0);
-	sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS);
-	sun5i_clkevt_time_start(0, false);
+	struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt);
+
+	sun5i_clkevt_time_stop(ce, 0);
+	sun5i_clkevt_time_setup(ce, 0, evt - TIMER_SYNC_TICKS);
+	sun5i_clkevt_time_start(ce, 0, false);
 
 	return 0;
 }
 
-static struct clock_event_device sun5i_clockevent = {
-	.name = "sun5i_tick",
-	.rating = 340,
-	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.set_mode = sun5i_clkevt_mode,
-	.set_next_event = sun5i_clkevt_next_event,
-};
-
-
 static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
 {
-	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+	struct sun5i_timer_clkevt *ce = (struct sun5i_timer_clkevt *)dev_id;
 
-	writel(0x1, timer_base + TIMER_IRQ_ST_REG);
-	evt->event_handler(evt);
+	writel(0x1, ce->timer.base + TIMER_IRQ_ST_REG);
+	ce->clkevt.event_handler(&ce->clkevt);
 
 	return IRQ_HANDLED;
 }
 
-static struct irqaction sun5i_timer_irq = {
-	.name = "sun5i_timer0",
-	.flags = IRQF_TIMER | IRQF_IRQPOLL,
-	.handler = sun5i_timer_interrupt,
-	.dev_id = &sun5i_clockevent,
-};
+static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
+{
+	struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
+
+	return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
+}
+
+static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
+				unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct sun5i_timer *timer = to_sun5i_timer(nb);
+	struct sun5i_timer_clksrc *cs = container_of(timer, struct sun5i_timer_clksrc, timer);
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+		clocksource_unregister(&cs->clksrc);
+		break;
+
+	case POST_RATE_CHANGE:
+		clocksource_register_hz(&cs->clksrc, ndata->new_rate);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int __init sun5i_setup_clocksource(struct device_node *node,
+					  void __iomem *base,
+					  struct clk *clk, int irq)
+{
+	struct sun5i_timer_clksrc *cs;
+	unsigned long rate;
+	int ret;
+
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
+		return -ENOMEM;
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Couldn't enable parent clock\n");
+		goto err_free;
+	}
+
+	rate = clk_get_rate(clk);
+
+	cs->timer.base = base;
+	cs->timer.clk = clk;
+	cs->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clksrc;
+	cs->timer.clk_rate_cb.next = NULL;
+
+	ret = clk_notifier_register(clk, &cs->timer.clk_rate_cb);
+	if (ret) {
+		pr_err("Unable to register clock notifier.\n");
+		goto err_disable_clk;
+	}
+
+	writel(~0, base + TIMER_INTVAL_LO_REG(1));
+	writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+	       base + TIMER_CTL_REG(1));
+
+	cs->clksrc.name = node->name;
+	cs->clksrc.rating = 340;
+	cs->clksrc.read = sun5i_clksrc_read;
+	cs->clksrc.mask = CLOCKSOURCE_MASK(32);
+	cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+	ret = clocksource_register_hz(&cs->clksrc, rate);
+	if (ret) {
+		pr_err("Couldn't register clock source.\n");
+		goto err_remove_notifier;
+	}
+
+	return 0;
+
+err_remove_notifier:
+	clk_notifier_unregister(clk, &cs->timer.clk_rate_cb);
+err_disable_clk:
+	clk_disable_unprepare(clk);
+err_free:
+	kfree(cs);
+	return ret;
+}
+
+static int sun5i_rate_cb_clkevt(struct notifier_block *nb,
+				unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct sun5i_timer *timer = to_sun5i_timer(nb);
+	struct sun5i_timer_clkevt *ce = container_of(timer, struct sun5i_timer_clkevt, timer);
+
+	if (event == POST_RATE_CHANGE) {
+		clockevents_update_freq(&ce->clkevt, ndata->new_rate);
+		ce->timer.ticks_per_jiffy = DIV_ROUND_UP(ndata->new_rate, HZ);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem *base,
+					 struct clk *clk, int irq)
+{
+	struct sun5i_timer_clkevt *ce;
+	unsigned long rate;
+	int ret;
+	u32 val;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce)
+		return -ENOMEM;
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Couldn't enable parent clock\n");
+		goto err_free;
+	}
+
+	rate = clk_get_rate(clk);
+
+	ce->timer.base = base;
+	ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
+	ce->timer.clk = clk;
+	ce->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clkevt;
+	ce->timer.clk_rate_cb.next = NULL;
+
+	ret = clk_notifier_register(clk, &ce->timer.clk_rate_cb);
+	if (ret) {
+		pr_err("Unable to register clock notifier.\n");
+		goto err_disable_clk;
+	}
+
+	ce->clkevt.name = node->name;
+	ce->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	ce->clkevt.set_next_event = sun5i_clkevt_next_event;
+	ce->clkevt.set_mode = sun5i_clkevt_mode;
+	ce->clkevt.rating = 340;
+	ce->clkevt.irq = irq;
+	ce->clkevt.cpumask = cpu_possible_mask;
+
+	/* Enable timer0 interrupt */
+	val = readl(base + TIMER_IRQ_EN_REG);
+	writel(val | TIMER_IRQ_EN(0), base + TIMER_IRQ_EN_REG);
+
+	clockevents_config_and_register(&ce->clkevt, rate,
+					TIMER_SYNC_TICKS, 0xffffffff);
+
+	ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL,
+			  "sun5i_timer0", ce);
+	if (ret) {
+		pr_err("Unable to register interrupt\n");
+		goto err_remove_notifier;
+	}
+
+	return 0;
+
+err_remove_notifier:
+	clk_notifier_unregister(clk, &ce->timer.clk_rate_cb);
+err_disable_clk:
+	clk_disable_unprepare(clk);
+err_free:
+	kfree(ce);
+	return ret;
+}
 
 static void __init sun5i_timer_init(struct device_node *node)
 {
 	struct reset_control *rstc;
-	unsigned long rate;
+	void __iomem *timer_base;
 	struct clk *clk;
-	int ret, irq;
-	u32 val;
+	int irq;
 
-	timer_base = of_iomap(node, 0);
+	timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
 	if (!timer_base)
 		panic("Can't map registers");
 
@@ -155,35 +334,13 @@
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk))
 		panic("Can't get timer clock");
-	clk_prepare_enable(clk);
-	rate = clk_get_rate(clk);
 
 	rstc = of_reset_control_get(node, NULL);
 	if (!IS_ERR(rstc))
 		reset_control_deassert(rstc);
 
-	writel(~0, timer_base + TIMER_INTVAL_LO_REG(1));
-	writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
-	       timer_base + TIMER_CTL_REG(1));
-
-	clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name,
-			      rate, 340, 32, clocksource_mmio_readl_down);
-
-	ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
-
-	/* Enable timer0 interrupt */
-	val = readl(timer_base + TIMER_IRQ_EN_REG);
-	writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
-
-	sun5i_clockevent.cpumask = cpu_possible_mask;
-	sun5i_clockevent.irq = irq;
-
-	clockevents_config_and_register(&sun5i_clockevent, rate,
-					TIMER_SYNC_TICKS, 0xffffffff);
-
-	ret = setup_irq(irq, &sun5i_timer_irq);
-	if (ret)
-		pr_warn("failed to setup irq %d\n", irq);
+	sun5i_setup_clocksource(node, timer_base, clk, irq);
+	sun5i_setup_clockevent(node, timer_base, clk, irq);
 }
 CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
 		       sun5i_timer_init);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 28e59a4..8ae655c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1698,15 +1698,18 @@
 		    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
 			pr_err("%s: Failed to start governor for policy: %p\n",
 				__func__, policy);
-
-		/*
-		 * schedule call cpufreq_update_policy() for boot CPU, i.e. last
-		 * policy in list. It will verify that the current freq is in
-		 * sync with what we believe it to be.
-		 */
-		if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
-			schedule_work(&policy->update);
 	}
+
+	/*
+	 * schedule call cpufreq_update_policy() for first-online CPU, as that
+	 * wouldn't be hotplugged-out on suspend. It will verify that the
+	 * current freq is in sync with what we believe it to be.
+	 */
+	policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
+	if (WARN_ON(!policy))
+		return;
+
+	schedule_work(&policy->update);
 }
 
 /**
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 080bd2d..7a73a27 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -330,9 +330,6 @@
 	if (!dev->registered)
 		return -EINVAL;
 
-	if (!dev->state_count)
-		dev->state_count = drv->state_count;
-
 	ret = cpuidle_add_device_sysfs(dev);
 	if (ret)
 		return ret;
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 2697e87..5db1478 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -13,7 +13,7 @@
 #include <linux/sched.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include "cpuidle.h"
 
@@ -130,21 +130,20 @@
 #endif
 
 /**
- * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer
+ * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer on a cpu
  * @arg: a void pointer used to match the SMP cross call API
  *
- * @arg is used as a value of type 'long' with one of the two values:
- * - CLOCK_EVT_NOTIFY_BROADCAST_ON
- * - CLOCK_EVT_NOTIFY_BROADCAST_OFF
+ * If @arg is NULL broadcast is disabled otherwise enabled
  *
- * Set the broadcast timer notification for the current CPU.  This function
- * is executed per CPU by an SMP cross call.  It not supposed to be called
- * directly.
+ * This function is executed per CPU by an SMP cross call.  It's not
+ * supposed to be called directly.
  */
 static void cpuidle_setup_broadcast_timer(void *arg)
 {
-	int cpu = smp_processor_id();
-	clockevents_notify((long)(arg), &cpu);
+	if (arg)
+		tick_broadcast_enable();
+	else
+		tick_broadcast_disable();
 }
 
 /**
@@ -239,7 +238,7 @@
 
 	if (drv->bctimer)
 		on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
-				 (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
+				 (void *)1, 1);
 
 	poll_idle_init(drv);
 
@@ -263,7 +262,7 @@
 	if (drv->bctimer) {
 		drv->bctimer = 0;
 		on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
-				 (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
+				 NULL, 1);
 	}
 
 	__cpuidle_unset_driver(drv);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 97c5903..832a2c3 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -401,7 +401,7 @@
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 
 	/* state statistics */
-	for (i = 0; i < device->state_count; i++) {
+	for (i = 0; i < drv->state_count; i++) {
 		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
 		if (!kobj)
 			goto error_state;
@@ -433,9 +433,10 @@
  */
 static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
 {
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 	int i;
 
-	for (i = 0; i < device->state_count; i++)
+	for (i = 0; i < drv->state_count; i++)
 		cpuidle_free_state_kobj(device, i);
 }
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index a874b6e..942ca54 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -51,19 +51,6 @@
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-config INTEL_MID_DMAC
-	tristate "Intel MID DMA support for Peripheral DMA controllers"
-	depends on PCI && X86
-	select DMA_ENGINE
-	default n
-	help
-	  Enable support for the Intel(R) MID DMA engine present
-	  in Intel MID chipsets.
-
-	  Say Y here if you have such a chipset.
-
-	  If unsure, say N.
-
 config ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index f915f61..539d482 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -6,7 +6,6 @@
 obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
-obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 512cb8e..ceedafb 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -903,6 +903,11 @@
 	return of_id->data;
 }
 
+#define CPPI41_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
 static int cppi41_dma_probe(struct platform_device *pdev)
 {
 	struct cppi41_dd *cdd;
@@ -926,6 +931,10 @@
 	cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
 	cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
 	cdd->ddev.device_terminate_all = cppi41_stop_chan;
+	cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
+	cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
+	cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	cdd->ddev.dev = dev;
 	INIT_LIST_HEAD(&cdd->ddev.channels);
 	cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index f15712f..ac336a9 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -859,9 +859,6 @@
 	BUG_ON(!device->device_issue_pending);
 	BUG_ON(!device->dev);
 
-	WARN(dma_has_cap(DMA_SLAVE, device->cap_mask) && !device->directions,
-	     "this driver doesn't support generic slave capabilities reporting\n");
-
 	/* note: this only matters in the
 	 * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
 	 */
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
deleted file mode 100644
index 5aaead9..0000000
--- a/drivers/dma/intel_mid_dma.c
+++ /dev/null
@@ -1,1447 +0,0 @@
-/*
- *  intel_mid_dma.c - Intel Langwell DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  The driver design is based on dw_dmac driver
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
-#include <linux/intel_mid_dma.h>
-#include <linux/module.h>
-
-#include "dmaengine.h"
-
-#define MAX_CHAN	4 /*max ch across controllers*/
-#include "intel_mid_dma_regs.h"
-
-#define INTEL_MID_DMAC1_ID		0x0814
-#define INTEL_MID_DMAC2_ID		0x0813
-#define INTEL_MID_GP_DMAC2_ID		0x0827
-#define INTEL_MFLD_DMAC1_ID		0x0830
-#define LNW_PERIPHRAL_MASK_BASE		0xFFAE8008
-#define LNW_PERIPHRAL_MASK_SIZE		0x10
-#define LNW_PERIPHRAL_STATUS		0x0
-#define LNW_PERIPHRAL_MASK		0x8
-
-struct intel_mid_dma_probe_info {
-	u8 max_chan;
-	u8 ch_base;
-	u16 block_size;
-	u32 pimr_mask;
-};
-
-#define INFO(_max_chan, _ch_base, _block_size, _pimr_mask) \
-	((kernel_ulong_t)&(struct intel_mid_dma_probe_info) {	\
-		.max_chan = (_max_chan),			\
-		.ch_base = (_ch_base),				\
-		.block_size = (_block_size),			\
-		.pimr_mask = (_pimr_mask),			\
-	})
-
-/*****************************************************************************
-Utility Functions*/
-/**
- * get_ch_index	-	convert status to channel
- * @status: status mask
- * @base: dma ch base value
- *
- * Modify the status mask and return the channel index needing
- * attention (or -1 if neither)
- */
-static int get_ch_index(int *status, unsigned int base)
-{
-	int i;
-	for (i = 0; i < MAX_CHAN; i++) {
-		if (*status & (1 << (i + base))) {
-			*status = *status & ~(1 << (i + base));
-			pr_debug("MDMA: index %d New status %x\n", i, *status);
-			return i;
-		}
-	}
-	return -1;
-}
-
-/**
- * get_block_ts	-	calculates dma transaction length
- * @len: dma transfer length
- * @tx_width: dma transfer src width
- * @block_size: dma controller max block size
- *
- * Based on src width calculate the DMA trsaction length in data items
- * return data items or FFFF if exceeds max length for block
- */
-static int get_block_ts(int len, int tx_width, int block_size)
-{
-	int byte_width = 0, block_ts = 0;
-
-	switch (tx_width) {
-	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		byte_width = 1;
-		break;
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		byte_width = 2;
-		break;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-	default:
-		byte_width = 4;
-		break;
-	}
-
-	block_ts = len/byte_width;
-	if (block_ts > block_size)
-		block_ts = 0xFFFF;
-	return block_ts;
-}
-
-/*****************************************************************************
-DMAC1 interrupt Functions*/
-
-/**
- * dmac1_mask_periphral_intr -	mask the periphral interrupt
- * @mid: dma device for which masking is required
- *
- * Masks the DMA periphral interrupt
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void dmac1_mask_periphral_intr(struct middma_device *mid)
-{
-	u32 pimr;
-
-	if (mid->pimr_mask) {
-		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
-		pimr |= mid->pimr_mask;
-		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
-	}
-	return;
-}
-
-/**
- * dmac1_unmask_periphral_intr -	unmask the periphral interrupt
- * @midc: dma channel for which masking is required
- *
- * UnMasks the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void dmac1_unmask_periphral_intr(struct intel_mid_dma_chan *midc)
-{
-	u32 pimr;
-	struct middma_device *mid = to_middma_device(midc->chan.device);
-
-	if (mid->pimr_mask) {
-		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
-		pimr &= ~mid->pimr_mask;
-		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
-	}
-	return;
-}
-
-/**
- * enable_dma_interrupt -	enable the periphral interrupt
- * @midc: dma channel for which enable interrupt is required
- *
- * Enable the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void enable_dma_interrupt(struct intel_mid_dma_chan *midc)
-{
-	dmac1_unmask_periphral_intr(midc);
-
-	/*en ch interrupts*/
-	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
-	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
-	return;
-}
-
-/**
- * disable_dma_interrupt -	disable the periphral interrupt
- * @midc: dma channel for which disable interrupt is required
- *
- * Disable the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void disable_dma_interrupt(struct intel_mid_dma_chan *midc)
-{
-	/*Check LPE PISR, make sure fwd is disabled*/
-	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_BLOCK);
-	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
-	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
-	return;
-}
-
-/*****************************************************************************
-DMA channel helper Functions*/
-/**
- * mid_desc_get		-	get a descriptor
- * @midc: dma channel for which descriptor is required
- *
- * Obtain a descriptor for the channel. Returns NULL if none are free.
- * Once the descriptor is returned it is private until put on another
- * list or freed
- */
-static struct intel_mid_dma_desc *midc_desc_get(struct intel_mid_dma_chan *midc)
-{
-	struct intel_mid_dma_desc *desc, *_desc;
-	struct intel_mid_dma_desc *ret = NULL;
-
-	spin_lock_bh(&midc->lock);
-	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
-		if (async_tx_test_ack(&desc->txd)) {
-			list_del(&desc->desc_node);
-			ret = desc;
-			break;
-		}
-	}
-	spin_unlock_bh(&midc->lock);
-	return ret;
-}
-
-/**
- * mid_desc_put		-	put a descriptor
- * @midc: dma channel for which descriptor is required
- * @desc: descriptor to put
- *
- * Return a descriptor from lwn_desc_get back to the free pool
- */
-static void midc_desc_put(struct intel_mid_dma_chan *midc,
-			struct intel_mid_dma_desc *desc)
-{
-	if (desc) {
-		spin_lock_bh(&midc->lock);
-		list_add_tail(&desc->desc_node, &midc->free_list);
-		spin_unlock_bh(&midc->lock);
-	}
-}
-/**
- * midc_dostart		-		begin a DMA transaction
- * @midc: channel for which txn is to be started
- * @first: first descriptor of series
- *
- * Load a transaction into the engine. This must be called with midc->lock
- * held and bh disabled.
- */
-static void midc_dostart(struct intel_mid_dma_chan *midc,
-			struct intel_mid_dma_desc *first)
-{
-	struct middma_device *mid = to_middma_device(midc->chan.device);
-
-	/*  channel is idle */
-	if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) {
-		/*error*/
-		pr_err("ERR_MDMA: channel is busy in start\n");
-		/* The tasklet will hopefully advance the queue... */
-		return;
-	}
-	midc->busy = true;
-	/*write registers and en*/
-	iowrite32(first->sar, midc->ch_regs + SAR);
-	iowrite32(first->dar, midc->ch_regs + DAR);
-	iowrite32(first->lli_phys, midc->ch_regs + LLP);
-	iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
-	iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
-	iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
-	iowrite32(first->ctl_hi, midc->ch_regs + CTL_HIGH);
-	pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
-		(int)first->sar, (int)first->dar, first->cfg_hi,
-		first->cfg_lo, first->ctl_hi, first->ctl_lo);
-	first->status = DMA_IN_PROGRESS;
-
-	iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-}
-
-/**
- * midc_descriptor_complete	-	process completed descriptor
- * @midc: channel owning the descriptor
- * @desc: the descriptor itself
- *
- * Process a completed descriptor and perform any callbacks upon
- * the completion. The completion handling drops the lock during the
- * callbacks but must be called with the lock held.
- */
-static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
-		struct intel_mid_dma_desc *desc)
-		__releases(&midc->lock) __acquires(&midc->lock)
-{
-	struct dma_async_tx_descriptor	*txd = &desc->txd;
-	dma_async_tx_callback callback_txd = NULL;
-	struct intel_mid_dma_lli	*llitem;
-	void *param_txd = NULL;
-
-	dma_cookie_complete(txd);
-	callback_txd = txd->callback;
-	param_txd = txd->callback_param;
-
-	if (desc->lli != NULL) {
-		/*clear the DONE bit of completed LLI in memory*/
-		llitem = desc->lli + desc->current_lli;
-		llitem->ctl_hi &= CLEAR_DONE;
-		if (desc->current_lli < desc->lli_length-1)
-			(desc->current_lli)++;
-		else
-			desc->current_lli = 0;
-	}
-	spin_unlock_bh(&midc->lock);
-	if (callback_txd) {
-		pr_debug("MDMA: TXD callback set ... calling\n");
-		callback_txd(param_txd);
-	}
-	if (midc->raw_tfr) {
-		desc->status = DMA_COMPLETE;
-		if (desc->lli != NULL) {
-			pci_pool_free(desc->lli_pool, desc->lli,
-						desc->lli_phys);
-			pci_pool_destroy(desc->lli_pool);
-			desc->lli = NULL;
-		}
-		list_move(&desc->desc_node, &midc->free_list);
-		midc->busy = false;
-	}
-	spin_lock_bh(&midc->lock);
-
-}
-/**
- * midc_scan_descriptors -		check the descriptors in channel
- *					mark completed when tx is completete
- * @mid: device
- * @midc: channel to scan
- *
- * Walk the descriptor chain for the device and process any entries
- * that are complete.
- */
-static void midc_scan_descriptors(struct middma_device *mid,
-				struct intel_mid_dma_chan *midc)
-{
-	struct intel_mid_dma_desc *desc = NULL, *_desc = NULL;
-
-	/*tx is complete*/
-	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		if (desc->status == DMA_IN_PROGRESS)
-			midc_descriptor_complete(midc, desc);
-	}
-	return;
-	}
-/**
- * midc_lli_fill_sg -		Helper function to convert
- *				SG list to Linked List Items.
- *@midc: Channel
- *@desc: DMA descriptor
- *@sglist: Pointer to SG list
- *@sglen: SG list length
- *@flags: DMA transaction flags
- *
- * Walk through the SG list and convert the SG list into Linked
- * List Items (LLI).
- */
-static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
-				struct intel_mid_dma_desc *desc,
-				struct scatterlist *sglist,
-				unsigned int sglen,
-				unsigned int flags)
-{
-	struct intel_mid_dma_slave *mids;
-	struct scatterlist  *sg;
-	dma_addr_t lli_next, sg_phy_addr;
-	struct intel_mid_dma_lli *lli_bloc_desc;
-	union intel_mid_dma_ctl_lo ctl_lo;
-	union intel_mid_dma_ctl_hi ctl_hi;
-	int i;
-
-	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
-	mids = midc->mid_slave;
-
-	lli_bloc_desc = desc->lli;
-	lli_next = desc->lli_phys;
-
-	ctl_lo.ctl_lo = desc->ctl_lo;
-	ctl_hi.ctl_hi = desc->ctl_hi;
-	for_each_sg(sglist, sg, sglen, i) {
-		/*Populate CTL_LOW and LLI values*/
-		if (i != sglen - 1) {
-			lli_next = lli_next +
-				sizeof(struct intel_mid_dma_lli);
-		} else {
-		/*Check for circular list, otherwise terminate LLI to ZERO*/
-			if (flags & DMA_PREP_CIRCULAR_LIST) {
-				pr_debug("MDMA: LLI is configured in circular mode\n");
-				lli_next = desc->lli_phys;
-			} else {
-				lli_next = 0;
-				ctl_lo.ctlx.llp_dst_en = 0;
-				ctl_lo.ctlx.llp_src_en = 0;
-			}
-		}
-		/*Populate CTL_HI values*/
-		ctl_hi.ctlx.block_ts = get_block_ts(sg_dma_len(sg),
-							desc->width,
-							midc->dma->block_size);
-		/*Populate SAR and DAR values*/
-		sg_phy_addr = sg_dma_address(sg);
-		if (desc->dirn ==  DMA_MEM_TO_DEV) {
-			lli_bloc_desc->sar  = sg_phy_addr;
-			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
-		} else if (desc->dirn ==  DMA_DEV_TO_MEM) {
-			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
-			lli_bloc_desc->dar  = sg_phy_addr;
-		}
-		/*Copy values into block descriptor in system memroy*/
-		lli_bloc_desc->llp = lli_next;
-		lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo;
-		lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi;
-
-		lli_bloc_desc++;
-	}
-	/*Copy very first LLI values to descriptor*/
-	desc->ctl_lo = desc->lli->ctl_lo;
-	desc->ctl_hi = desc->lli->ctl_hi;
-	desc->sar = desc->lli->sar;
-	desc->dar = desc->lli->dar;
-
-	return 0;
-}
-/*****************************************************************************
-DMA engine callback Functions*/
-/**
- * intel_mid_dma_tx_submit -	callback to submit DMA transaction
- * @tx: dma engine descriptor
- *
- * Submit the DMA transaction for this descriptor, start if ch idle
- */
-static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-	struct intel_mid_dma_desc	*desc = to_intel_mid_dma_desc(tx);
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(tx->chan);
-	dma_cookie_t		cookie;
-
-	spin_lock_bh(&midc->lock);
-	cookie = dma_cookie_assign(tx);
-
-	if (list_empty(&midc->active_list))
-		list_add_tail(&desc->desc_node, &midc->active_list);
-	else
-		list_add_tail(&desc->desc_node, &midc->queue);
-
-	midc_dostart(midc, desc);
-	spin_unlock_bh(&midc->lock);
-
-	return cookie;
-}
-
-/**
- * intel_mid_dma_issue_pending -	callback to issue pending txn
- * @chan: chan where pending trascation needs to be checked and submitted
- *
- * Call for scan to issue pending descriptors
- */
-static void intel_mid_dma_issue_pending(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-
-	spin_lock_bh(&midc->lock);
-	if (!list_empty(&midc->queue))
-		midc_scan_descriptors(to_middma_device(chan->device), midc);
-	spin_unlock_bh(&midc->lock);
-}
-
-/**
- * intel_mid_dma_tx_status -	Return status of txn
- * @chan: chan for where status needs to be checked
- * @cookie: cookie for txn
- * @txstate: DMA txn state
- *
- * Return status of DMA txn
- */
-static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
-						dma_cookie_t cookie,
-						struct dma_tx_state *txstate)
-{
-	struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan);
-	enum dma_status ret;
-
-	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_COMPLETE) {
-		spin_lock_bh(&midc->lock);
-		midc_scan_descriptors(to_middma_device(chan->device), midc);
-		spin_unlock_bh(&midc->lock);
-
-		ret = dma_cookie_status(chan, cookie, txstate);
-	}
-
-	return ret;
-}
-
-static int intel_mid_dma_config(struct dma_chan *chan,
-				struct dma_slave_config *slave)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct intel_mid_dma_slave *mid_slave;
-
-	BUG_ON(!midc);
-	BUG_ON(!slave);
-	pr_debug("MDMA: slave control called\n");
-
-	mid_slave = to_intel_mid_dma_slave(slave);
-
-	BUG_ON(!mid_slave);
-
-	midc->mid_slave = mid_slave;
-	return 0;
-}
-
-static int intel_mid_dma_terminate_all(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct middma_device	*mid = to_middma_device(chan->device);
-	struct intel_mid_dma_desc	*desc, *_desc;
-	union intel_mid_dma_cfg_lo cfg_lo;
-
-	spin_lock_bh(&midc->lock);
-	if (midc->busy == false) {
-		spin_unlock_bh(&midc->lock);
-		return 0;
-	}
-	/*Suspend and disable the channel*/
-	cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW);
-	cfg_lo.cfgx.ch_susp = 1;
-	iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW);
-	iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-	midc->busy = false;
-	/* Disable interrupts */
-	disable_dma_interrupt(midc);
-	midc->descs_allocated = 0;
-
-	spin_unlock_bh(&midc->lock);
-	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		if (desc->lli != NULL) {
-			pci_pool_free(desc->lli_pool, desc->lli,
-						desc->lli_phys);
-			pci_pool_destroy(desc->lli_pool);
-			desc->lli = NULL;
-		}
-		list_move(&desc->desc_node, &midc->free_list);
-	}
-	return 0;
-}
-
-
-/**
- * intel_mid_dma_prep_memcpy -	Prep memcpy txn
- * @chan: chan for DMA transfer
- * @dest: destn address
- * @src: src address
- * @len: DMA transfer len
- * @flags: DMA flags
- *
- * Perform a DMA memcpy. Note we support slave periphral DMA transfers only
- * The periphral txn details should be filled in slave structure properly
- * Returns the descriptor for this txn
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
-			struct dma_chan *chan, dma_addr_t dest,
-			dma_addr_t src, size_t len, unsigned long flags)
-{
-	struct intel_mid_dma_chan *midc;
-	struct intel_mid_dma_desc *desc = NULL;
-	struct intel_mid_dma_slave *mids;
-	union intel_mid_dma_ctl_lo ctl_lo;
-	union intel_mid_dma_ctl_hi ctl_hi;
-	union intel_mid_dma_cfg_lo cfg_lo;
-	union intel_mid_dma_cfg_hi cfg_hi;
-	enum dma_slave_buswidth width;
-
-	pr_debug("MDMA: Prep for memcpy\n");
-	BUG_ON(!chan);
-	if (!len)
-		return NULL;
-
-	midc = to_intel_mid_dma_chan(chan);
-	BUG_ON(!midc);
-
-	mids = midc->mid_slave;
-	BUG_ON(!mids);
-
-	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
-				midc->dma->pci_id, midc->ch_id, len);
-	pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
-			mids->cfg_mode, mids->dma_slave.direction,
-			mids->hs_mode, mids->dma_slave.src_addr_width);
-
-	/*calculate CFG_LO*/
-	if (mids->hs_mode == LNW_DMA_SW_HS) {
-		cfg_lo.cfg_lo = 0;
-		cfg_lo.cfgx.hs_sel_dst = 1;
-		cfg_lo.cfgx.hs_sel_src = 1;
-	} else if (mids->hs_mode == LNW_DMA_HW_HS)
-		cfg_lo.cfg_lo = 0x00000;
-
-	/*calculate CFG_HI*/
-	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
-		/*SW HS only*/
-		cfg_hi.cfg_hi = 0;
-	} else {
-		cfg_hi.cfg_hi = 0;
-		if (midc->dma->pimr_mask) {
-			cfg_hi.cfgx.protctl = 0x0; /*default value*/
-			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
-				cfg_hi.cfgx.src_per = 0;
-				if (mids->device_instance == 0)
-					cfg_hi.cfgx.dst_per = 3;
-				if (mids->device_instance == 1)
-					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
-				if (mids->device_instance == 0)
-					cfg_hi.cfgx.src_per = 2;
-				if (mids->device_instance == 1)
-					cfg_hi.cfgx.src_per = 0;
-				cfg_hi.cfgx.dst_per = 0;
-			}
-		} else {
-			cfg_hi.cfgx.protctl = 0x1; /*default value*/
-			cfg_hi.cfgx.src_per = cfg_hi.cfgx.dst_per =
-					midc->ch_id - midc->dma->chan_base;
-		}
-	}
-
-	/*calculate CTL_HI*/
-	ctl_hi.ctlx.reser = 0;
-	ctl_hi.ctlx.done  = 0;
-	width = mids->dma_slave.src_addr_width;
-
-	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
-	pr_debug("MDMA:calc len %d for block size %d\n",
-				ctl_hi.ctlx.block_ts, midc->dma->block_size);
-	/*calculate CTL_LO*/
-	ctl_lo.ctl_lo = 0;
-	ctl_lo.ctlx.int_en = 1;
-	ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
-	ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
-
-	/*
-	 * Here we need some translation from "enum dma_slave_buswidth"
-	 * to the format for our dma controller
-	 *		standard	intel_mid_dmac's format
-	 *		 1 Byte			0b000
-	 *		 2 Bytes		0b001
-	 *		 4 Bytes		0b010
-	 */
-	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2;
-	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2;
-
-	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
-		ctl_lo.ctlx.tt_fc = 0;
-		ctl_lo.ctlx.sinc = 0;
-		ctl_lo.ctlx.dinc = 0;
-	} else {
-		if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
-			ctl_lo.ctlx.sinc = 0;
-			ctl_lo.ctlx.dinc = 2;
-			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
-			ctl_lo.ctlx.sinc = 2;
-			ctl_lo.ctlx.dinc = 0;
-			ctl_lo.ctlx.tt_fc = 2;
-		}
-	}
-
-	pr_debug("MDMA:Calc CTL LO %x, CTL HI %x, CFG LO %x, CFG HI %x\n",
-		ctl_lo.ctl_lo, ctl_hi.ctl_hi, cfg_lo.cfg_lo, cfg_hi.cfg_hi);
-
-	enable_dma_interrupt(midc);
-
-	desc = midc_desc_get(midc);
-	if (desc == NULL)
-		goto err_desc_get;
-	desc->sar = src;
-	desc->dar = dest ;
-	desc->len = len;
-	desc->cfg_hi = cfg_hi.cfg_hi;
-	desc->cfg_lo = cfg_lo.cfg_lo;
-	desc->ctl_lo = ctl_lo.ctl_lo;
-	desc->ctl_hi = ctl_hi.ctl_hi;
-	desc->width = width;
-	desc->dirn = mids->dma_slave.direction;
-	desc->lli_phys = 0;
-	desc->lli = NULL;
-	desc->lli_pool = NULL;
-	return &desc->txd;
-
-err_desc_get:
-	pr_err("ERR_MDMA: Failed to get desc\n");
-	midc_desc_put(midc, desc);
-	return NULL;
-}
-/**
- * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
- * @chan: chan for DMA transfer
- * @sgl: scatter gather list
- * @sg_len: length of sg txn
- * @direction: DMA transfer dirtn
- * @flags: DMA flags
- * @context: transfer context (ignored)
- *
- * Prepares LLI based periphral transfer
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
-			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_transfer_direction direction,
-			unsigned long flags, void *context)
-{
-	struct intel_mid_dma_chan *midc = NULL;
-	struct intel_mid_dma_slave *mids = NULL;
-	struct intel_mid_dma_desc *desc = NULL;
-	struct dma_async_tx_descriptor *txd = NULL;
-	union intel_mid_dma_ctl_lo ctl_lo;
-
-	pr_debug("MDMA: Prep for slave SG\n");
-
-	if (!sg_len) {
-		pr_err("MDMA: Invalid SG length\n");
-		return NULL;
-	}
-	midc = to_intel_mid_dma_chan(chan);
-	BUG_ON(!midc);
-
-	mids = midc->mid_slave;
-	BUG_ON(!mids);
-
-	if (!midc->dma->pimr_mask) {
-		/* We can still handle sg list with only one item */
-		if (sg_len == 1) {
-			txd = intel_mid_dma_prep_memcpy(chan,
-						mids->dma_slave.dst_addr,
-						mids->dma_slave.src_addr,
-						sg_dma_len(sgl),
-						flags);
-			return txd;
-		} else {
-			pr_warn("MDMA: SG list is not supported by this controller\n");
-			return  NULL;
-		}
-	}
-
-	pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
-			sg_len, direction, flags);
-
-	txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sg_dma_len(sgl), flags);
-	if (NULL == txd) {
-		pr_err("MDMA: Prep memcpy failed\n");
-		return NULL;
-	}
-
-	desc = to_intel_mid_dma_desc(txd);
-	desc->dirn = direction;
-	ctl_lo.ctl_lo = desc->ctl_lo;
-	ctl_lo.ctlx.llp_dst_en = 1;
-	ctl_lo.ctlx.llp_src_en = 1;
-	desc->ctl_lo = ctl_lo.ctl_lo;
-	desc->lli_length = sg_len;
-	desc->current_lli = 0;
-	/* DMA coherent memory pool for LLI descriptors*/
-	desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool",
-				midc->dma->pdev,
-				(sizeof(struct intel_mid_dma_lli)*sg_len),
-				32, 0);
-	if (NULL == desc->lli_pool) {
-		pr_err("MID_DMA:LLI pool create failed\n");
-		return NULL;
-	}
-
-	desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys);
-	if (!desc->lli) {
-		pr_err("MID_DMA: LLI alloc failed\n");
-		pci_pool_destroy(desc->lli_pool);
-		return NULL;
-	}
-
-	midc_lli_fill_sg(midc, desc, sgl, sg_len, flags);
-	if (flags & DMA_PREP_INTERRUPT) {
-		iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				midc->dma_base + MASK_BLOCK);
-		pr_debug("MDMA:Enabled Block interrupt\n");
-	}
-	return &desc->txd;
-}
-
-/**
- * intel_mid_dma_free_chan_resources -	Frees dma resources
- * @chan: chan requiring attention
- *
- * Frees the allocated resources on this DMA chan
- */
-static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct middma_device	*mid = to_middma_device(chan->device);
-	struct intel_mid_dma_desc	*desc, *_desc;
-
-	if (true == midc->busy) {
-		/*trying to free ch in use!!!!!*/
-		pr_err("ERR_MDMA: trying to free ch in use\n");
-	}
-	spin_lock_bh(&midc->lock);
-	midc->descs_allocated = 0;
-	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		list_del(&desc->desc_node);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-	}
-	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
-		list_del(&desc->desc_node);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-	}
-	list_for_each_entry_safe(desc, _desc, &midc->queue, desc_node) {
-		list_del(&desc->desc_node);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-	}
-	spin_unlock_bh(&midc->lock);
-	midc->in_use = false;
-	midc->busy = false;
-	/* Disable CH interrupts */
-	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
-	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
-	pm_runtime_put(&mid->pdev->dev);
-}
-
-/**
- * intel_mid_dma_alloc_chan_resources -	Allocate dma resources
- * @chan: chan requiring attention
- *
- * Allocates DMA resources on this chan
- * Return the descriptors allocated
- */
-static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct middma_device	*mid = to_middma_device(chan->device);
-	struct intel_mid_dma_desc	*desc;
-	dma_addr_t		phys;
-	int	i = 0;
-
-	pm_runtime_get_sync(&mid->pdev->dev);
-
-	if (mid->state == SUSPENDED) {
-		if (dma_resume(&mid->pdev->dev)) {
-			pr_err("ERR_MDMA: resume failed");
-			return -EFAULT;
-		}
-	}
-
-	/* ASSERT:  channel is idle */
-	if (test_ch_en(mid->dma_base, midc->ch_id)) {
-		/*ch is not idle*/
-		pr_err("ERR_MDMA: ch not idle\n");
-		pm_runtime_put(&mid->pdev->dev);
-		return -EIO;
-	}
-	dma_cookie_init(chan);
-
-	spin_lock_bh(&midc->lock);
-	while (midc->descs_allocated < DESCS_PER_CHANNEL) {
-		spin_unlock_bh(&midc->lock);
-		desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys);
-		if (!desc) {
-			pr_err("ERR_MDMA: desc failed\n");
-			pm_runtime_put(&mid->pdev->dev);
-			return -ENOMEM;
-			/*check*/
-		}
-		dma_async_tx_descriptor_init(&desc->txd, chan);
-		desc->txd.tx_submit = intel_mid_dma_tx_submit;
-		desc->txd.flags = DMA_CTRL_ACK;
-		desc->txd.phys = phys;
-		spin_lock_bh(&midc->lock);
-		i = ++midc->descs_allocated;
-		list_add_tail(&desc->desc_node, &midc->free_list);
-	}
-	spin_unlock_bh(&midc->lock);
-	midc->in_use = true;
-	midc->busy = false;
-	pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i);
-	return i;
-}
-
-/**
- * midc_handle_error -	Handle DMA txn error
- * @mid: controller where error occurred
- * @midc: chan where error occurred
- *
- * Scan the descriptor for error
- */
-static void midc_handle_error(struct middma_device *mid,
-		struct intel_mid_dma_chan *midc)
-{
-	midc_scan_descriptors(mid, midc);
-}
-
-/**
- * dma_tasklet -	DMA interrupt tasklet
- * @data: tasklet arg (the controller structure)
- *
- * Scan the controller for interrupts for completion/error
- * Clear the interrupt and call for handling completion/error
- */
-static void dma_tasklet(unsigned long data)
-{
-	struct middma_device *mid = NULL;
-	struct intel_mid_dma_chan *midc = NULL;
-	u32 status, raw_tfr, raw_block;
-	int i;
-
-	mid = (struct middma_device *)data;
-	if (mid == NULL) {
-		pr_err("ERR_MDMA: tasklet Null param\n");
-		return;
-	}
-	pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
-	raw_tfr = ioread32(mid->dma_base + RAW_TFR);
-	raw_block = ioread32(mid->dma_base + RAW_BLOCK);
-	status = raw_tfr | raw_block;
-	status &= mid->intr_mask;
-	while (status) {
-		/*txn interrupt*/
-		i = get_ch_index(&status, mid->chan_base);
-		if (i < 0) {
-			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
-			return;
-		}
-		midc = &mid->ch[i];
-		if (midc == NULL) {
-			pr_err("ERR_MDMA:Null param midc\n");
-			return;
-		}
-		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
-				status, midc->ch_id, i);
-		midc->raw_tfr = raw_tfr;
-		midc->raw_block = raw_block;
-		spin_lock_bh(&midc->lock);
-		/*clearing this interrupts first*/
-		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
-		if (raw_block) {
-			iowrite32((1 << midc->ch_id),
-				mid->dma_base + CLEAR_BLOCK);
-		}
-		midc_scan_descriptors(mid, midc);
-		pr_debug("MDMA:Scan of desc... complete, unmasking\n");
-		iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				mid->dma_base + MASK_TFR);
-		if (raw_block) {
-			iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				mid->dma_base + MASK_BLOCK);
-		}
-		spin_unlock_bh(&midc->lock);
-	}
-
-	status = ioread32(mid->dma_base + RAW_ERR);
-	status &= mid->intr_mask;
-	while (status) {
-		/*err interrupt*/
-		i = get_ch_index(&status, mid->chan_base);
-		if (i < 0) {
-			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
-			return;
-		}
-		midc = &mid->ch[i];
-		if (midc == NULL) {
-			pr_err("ERR_MDMA:Null param midc\n");
-			return;
-		}
-		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
-				status, midc->ch_id, i);
-
-		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_ERR);
-		spin_lock_bh(&midc->lock);
-		midc_handle_error(mid, midc);
-		iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				mid->dma_base + MASK_ERR);
-		spin_unlock_bh(&midc->lock);
-	}
-	pr_debug("MDMA:Exiting takslet...\n");
-	return;
-}
-
-static void dma_tasklet1(unsigned long data)
-{
-	pr_debug("MDMA:in takslet1...\n");
-	return dma_tasklet(data);
-}
-
-static void dma_tasklet2(unsigned long data)
-{
-	pr_debug("MDMA:in takslet2...\n");
-	return dma_tasklet(data);
-}
-
-/**
- * intel_mid_dma_interrupt -	DMA ISR
- * @irq: IRQ where interrupt occurred
- * @data: ISR cllback data (the controller structure)
- *
- * See if this is our interrupt if so then schedule the tasklet
- * otherwise ignore
- */
-static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
-{
-	struct middma_device *mid = data;
-	u32 tfr_status, err_status;
-	int call_tasklet = 0;
-
-	tfr_status = ioread32(mid->dma_base + RAW_TFR);
-	err_status = ioread32(mid->dma_base + RAW_ERR);
-	if (!tfr_status && !err_status)
-		return IRQ_NONE;
-
-	/*DMA Interrupt*/
-	pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
-	pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask);
-	tfr_status &= mid->intr_mask;
-	if (tfr_status) {
-		/*need to disable intr*/
-		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR);
-		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK);
-		pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
-		call_tasklet = 1;
-	}
-	err_status &= mid->intr_mask;
-	if (err_status) {
-		iowrite32((err_status << INT_MASK_WE),
-			  mid->dma_base + MASK_ERR);
-		call_tasklet = 1;
-	}
-	if (call_tasklet)
-		tasklet_schedule(&mid->tasklet);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t intel_mid_dma_interrupt1(int irq, void *data)
-{
-	return intel_mid_dma_interrupt(irq, data);
-}
-
-static irqreturn_t intel_mid_dma_interrupt2(int irq, void *data)
-{
-	return intel_mid_dma_interrupt(irq, data);
-}
-
-/**
- * mid_setup_dma -	Setup the DMA controller
- * @pdev: Controller PCI device structure
- *
- * Initialize the DMA controller, channels, registers with DMA engine,
- * ISR. Initialize DMA controller channels.
- */
-static int mid_setup_dma(struct pci_dev *pdev)
-{
-	struct middma_device *dma = pci_get_drvdata(pdev);
-	int err, i;
-
-	/* DMA coherent memory pool for DMA descriptor allocations */
-	dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev,
-					sizeof(struct intel_mid_dma_desc),
-					32, 0);
-	if (NULL == dma->dma_pool) {
-		pr_err("ERR_MDMA:pci_pool_create failed\n");
-		err = -ENOMEM;
-		goto err_dma_pool;
-	}
-
-	INIT_LIST_HEAD(&dma->common.channels);
-	dma->pci_id = pdev->device;
-	if (dma->pimr_mask) {
-		dma->mask_reg = ioremap(LNW_PERIPHRAL_MASK_BASE,
-					LNW_PERIPHRAL_MASK_SIZE);
-		if (dma->mask_reg == NULL) {
-			pr_err("ERR_MDMA:Can't map periphral intr space !!\n");
-			err = -ENOMEM;
-			goto err_ioremap;
-		}
-	} else
-		dma->mask_reg = NULL;
-
-	pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan);
-	/*init CH structures*/
-	dma->intr_mask = 0;
-	dma->state = RUNNING;
-	for (i = 0; i < dma->max_chan; i++) {
-		struct intel_mid_dma_chan *midch = &dma->ch[i];
-
-		midch->chan.device = &dma->common;
-		dma_cookie_init(&midch->chan);
-		midch->ch_id = dma->chan_base + i;
-		pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
-
-		midch->dma_base = dma->dma_base;
-		midch->ch_regs = dma->dma_base + DMA_CH_SIZE * midch->ch_id;
-		midch->dma = dma;
-		dma->intr_mask |= 1 << (dma->chan_base + i);
-		spin_lock_init(&midch->lock);
-
-		INIT_LIST_HEAD(&midch->active_list);
-		INIT_LIST_HEAD(&midch->queue);
-		INIT_LIST_HEAD(&midch->free_list);
-		/*mask interrupts*/
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_BLOCK);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_SRC_TRAN);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_DST_TRAN);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_ERR);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_TFR);
-
-		disable_dma_interrupt(midch);
-		list_add_tail(&midch->chan.device_node, &dma->common.channels);
-	}
-	pr_debug("MDMA: Calc Mask as %x for this controller\n", dma->intr_mask);
-
-	/*init dma structure*/
-	dma_cap_zero(dma->common.cap_mask);
-	dma_cap_set(DMA_MEMCPY, dma->common.cap_mask);
-	dma_cap_set(DMA_SLAVE, dma->common.cap_mask);
-	dma_cap_set(DMA_PRIVATE, dma->common.cap_mask);
-	dma->common.dev = &pdev->dev;
-
-	dma->common.device_alloc_chan_resources =
-					intel_mid_dma_alloc_chan_resources;
-	dma->common.device_free_chan_resources =
-					intel_mid_dma_free_chan_resources;
-
-	dma->common.device_tx_status = intel_mid_dma_tx_status;
-	dma->common.device_prep_dma_memcpy = intel_mid_dma_prep_memcpy;
-	dma->common.device_issue_pending = intel_mid_dma_issue_pending;
-	dma->common.device_prep_slave_sg = intel_mid_dma_prep_slave_sg;
-	dma->common.device_config = intel_mid_dma_config;
-	dma->common.device_terminate_all = intel_mid_dma_terminate_all;
-
-	/*enable dma cntrl*/
-	iowrite32(REG_BIT0, dma->dma_base + DMA_CFG);
-
-	/*register irq */
-	if (dma->pimr_mask) {
-		pr_debug("MDMA:Requesting irq shared for DMAC1\n");
-		err = request_irq(pdev->irq, intel_mid_dma_interrupt1,
-			IRQF_SHARED, "INTEL_MID_DMAC1", dma);
-		if (0 != err)
-			goto err_irq;
-	} else {
-		dma->intr_mask = 0x03;
-		pr_debug("MDMA:Requesting irq for DMAC2\n");
-		err = request_irq(pdev->irq, intel_mid_dma_interrupt2,
-			IRQF_SHARED, "INTEL_MID_DMAC2", dma);
-		if (0 != err)
-			goto err_irq;
-	}
-	/*register device w/ engine*/
-	err = dma_async_device_register(&dma->common);
-	if (0 != err) {
-		pr_err("ERR_MDMA:device_register failed: %d\n", err);
-		goto err_engine;
-	}
-	if (dma->pimr_mask) {
-		pr_debug("setting up tasklet1 for DMAC1\n");
-		tasklet_init(&dma->tasklet, dma_tasklet1, (unsigned long)dma);
-	} else {
-		pr_debug("setting up tasklet2 for DMAC2\n");
-		tasklet_init(&dma->tasklet, dma_tasklet2, (unsigned long)dma);
-	}
-	return 0;
-
-err_engine:
-	free_irq(pdev->irq, dma);
-err_irq:
-	if (dma->mask_reg)
-		iounmap(dma->mask_reg);
-err_ioremap:
-	pci_pool_destroy(dma->dma_pool);
-err_dma_pool:
-	pr_err("ERR_MDMA:setup_dma failed: %d\n", err);
-	return err;
-
-}
-
-/**
- * middma_shutdown -	Shutdown the DMA controller
- * @pdev: Controller PCI device structure
- *
- * Called by remove
- * Unregister DMa controller, clear all structures and free interrupt
- */
-static void middma_shutdown(struct pci_dev *pdev)
-{
-	struct middma_device *device = pci_get_drvdata(pdev);
-
-	dma_async_device_unregister(&device->common);
-	pci_pool_destroy(device->dma_pool);
-	if (device->mask_reg)
-		iounmap(device->mask_reg);
-	if (device->dma_base)
-		iounmap(device->dma_base);
-	free_irq(pdev->irq, device);
-	return;
-}
-
-/**
- * intel_mid_dma_probe -	PCI Probe
- * @pdev: Controller PCI device structure
- * @id: pci device id structure
- *
- * Initialize the PCI device, map BARs, query driver data.
- * Call setup_dma to complete contoller and chan initilzation
- */
-static int intel_mid_dma_probe(struct pci_dev *pdev,
-					const struct pci_device_id *id)
-{
-	struct middma_device *device;
-	u32 base_addr, bar_size;
-	struct intel_mid_dma_probe_info *info;
-	int err;
-
-	pr_debug("MDMA: probe for %x\n", pdev->device);
-	info = (void *)id->driver_data;
-	pr_debug("MDMA: CH %d, base %d, block len %d, Periphral mask %x\n",
-				info->max_chan, info->ch_base,
-				info->block_size, info->pimr_mask);
-
-	err = pci_enable_device(pdev);
-	if (err)
-		goto err_enable_device;
-
-	err = pci_request_regions(pdev, "intel_mid_dmac");
-	if (err)
-		goto err_request_regions;
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (err)
-		goto err_set_dma_mask;
-
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (err)
-		goto err_set_dma_mask;
-
-	device = kzalloc(sizeof(*device), GFP_KERNEL);
-	if (!device) {
-		pr_err("ERR_MDMA:kzalloc failed probe\n");
-		err = -ENOMEM;
-		goto err_kzalloc;
-	}
-	device->pdev = pci_dev_get(pdev);
-
-	base_addr = pci_resource_start(pdev, 0);
-	bar_size  = pci_resource_len(pdev, 0);
-	device->dma_base = ioremap_nocache(base_addr, DMA_REG_SIZE);
-	if (!device->dma_base) {
-		pr_err("ERR_MDMA:ioremap failed\n");
-		err = -ENOMEM;
-		goto err_ioremap;
-	}
-	pci_set_drvdata(pdev, device);
-	pci_set_master(pdev);
-	device->max_chan = info->max_chan;
-	device->chan_base = info->ch_base;
-	device->block_size = info->block_size;
-	device->pimr_mask = info->pimr_mask;
-
-	err = mid_setup_dma(pdev);
-	if (err)
-		goto err_dma;
-
-	pm_runtime_put_noidle(&pdev->dev);
-	pm_runtime_allow(&pdev->dev);
-	return 0;
-
-err_dma:
-	iounmap(device->dma_base);
-err_ioremap:
-	pci_dev_put(pdev);
-	kfree(device);
-err_kzalloc:
-err_set_dma_mask:
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
-	pr_err("ERR_MDMA:Probe failed %d\n", err);
-	return err;
-}
-
-/**
- * intel_mid_dma_remove -	PCI remove
- * @pdev: Controller PCI device structure
- *
- * Free up all resources and data
- * Call shutdown_dma to complete contoller and chan cleanup
- */
-static void intel_mid_dma_remove(struct pci_dev *pdev)
-{
-	struct middma_device *device = pci_get_drvdata(pdev);
-
-	pm_runtime_get_noresume(&pdev->dev);
-	pm_runtime_forbid(&pdev->dev);
-	middma_shutdown(pdev);
-	pci_dev_put(pdev);
-	kfree(device);
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-}
-
-/* Power Management */
-/*
-* dma_suspend - PCI suspend function
-*
-* @pci: PCI device structure
-* @state: PM message
-*
-* This function is called by OS when a power event occurs
-*/
-static int dma_suspend(struct device *dev)
-{
-	struct pci_dev *pci = to_pci_dev(dev);
-	int i;
-	struct middma_device *device = pci_get_drvdata(pci);
-	pr_debug("MDMA: dma_suspend called\n");
-
-	for (i = 0; i < device->max_chan; i++) {
-		if (device->ch[i].in_use)
-			return -EAGAIN;
-	}
-	dmac1_mask_periphral_intr(device);
-	device->state = SUSPENDED;
-	pci_save_state(pci);
-	pci_disable_device(pci);
-	pci_set_power_state(pci, PCI_D3hot);
-	return 0;
-}
-
-/**
-* dma_resume - PCI resume function
-*
-* @pci:	PCI device structure
-*
-* This function is called by OS when a power event occurs
-*/
-int dma_resume(struct device *dev)
-{
-	struct pci_dev *pci = to_pci_dev(dev);
-	int ret;
-	struct middma_device *device = pci_get_drvdata(pci);
-
-	pr_debug("MDMA: dma_resume called\n");
-	pci_set_power_state(pci, PCI_D0);
-	pci_restore_state(pci);
-	ret = pci_enable_device(pci);
-	if (ret) {
-		pr_err("MDMA: device can't be enabled for %x\n", pci->device);
-		return ret;
-	}
-	device->state = RUNNING;
-	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-	return 0;
-}
-
-static int dma_runtime_suspend(struct device *dev)
-{
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct middma_device *device = pci_get_drvdata(pci_dev);
-
-	device->state = SUSPENDED;
-	return 0;
-}
-
-static int dma_runtime_resume(struct device *dev)
-{
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct middma_device *device = pci_get_drvdata(pci_dev);
-
-	device->state = RUNNING;
-	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-	return 0;
-}
-
-static int dma_runtime_idle(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct middma_device *device = pci_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < device->max_chan; i++) {
-		if (device->ch[i].in_use)
-			return -EAGAIN;
-	}
-
-	return 0;
-}
-
-/******************************************************************************
-* PCI stuff
-*/
-static struct pci_device_id intel_mid_dma_ids[] = {
-	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC1_ID),	INFO(2, 6, 4095, 0x200020)},
-	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC2_ID),	INFO(2, 0, 2047, 0)},
-	{ PCI_VDEVICE(INTEL, INTEL_MID_GP_DMAC2_ID),	INFO(2, 0, 2047, 0)},
-	{ PCI_VDEVICE(INTEL, INTEL_MFLD_DMAC1_ID),	INFO(4, 0, 4095, 0x400040)},
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids);
-
-static const struct dev_pm_ops intel_mid_dma_pm = {
-	.runtime_suspend = dma_runtime_suspend,
-	.runtime_resume = dma_runtime_resume,
-	.runtime_idle = dma_runtime_idle,
-	.suspend = dma_suspend,
-	.resume = dma_resume,
-};
-
-static struct pci_driver intel_mid_dma_pci_driver = {
-	.name		=	"Intel MID DMA",
-	.id_table	=	intel_mid_dma_ids,
-	.probe		=	intel_mid_dma_probe,
-	.remove		=	intel_mid_dma_remove,
-#ifdef CONFIG_PM
-	.driver = {
-		.pm = &intel_mid_dma_pm,
-	},
-#endif
-};
-
-static int __init intel_mid_dma_init(void)
-{
-	pr_debug("INFO_MDMA: LNW DMA Driver Version %s\n",
-			INTEL_MID_DMA_DRIVER_VERSION);
-	return pci_register_driver(&intel_mid_dma_pci_driver);
-}
-fs_initcall(intel_mid_dma_init);
-
-static void __exit intel_mid_dma_exit(void)
-{
-	pci_unregister_driver(&intel_mid_dma_pci_driver);
-}
-module_exit(intel_mid_dma_exit);
-
-MODULE_AUTHOR("Vinod Koul <vinod.koul@intel.com>");
-MODULE_DESCRIPTION("Intel (R) MID DMAC Driver");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(INTEL_MID_DMA_DRIVER_VERSION);
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
deleted file mode 100644
index 17b4219..0000000
--- a/drivers/dma/intel_mid_dma_regs.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- *  intel_mid_dma_regs.h - Intel MID DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#ifndef __INTEL_MID_DMAC_REGS_H__
-#define __INTEL_MID_DMAC_REGS_H__
-
-#include <linux/dmaengine.h>
-#include <linux/dmapool.h>
-#include <linux/pci_ids.h>
-
-#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0"
-
-#define	REG_BIT0		0x00000001
-#define	REG_BIT8		0x00000100
-#define INT_MASK_WE		0x8
-#define CLEAR_DONE		0xFFFFEFFF
-#define UNMASK_INTR_REG(chan_num) \
-	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
-#define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
-
-#define ENABLE_CHANNEL(chan_num) \
-	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
-
-#define DISABLE_CHANNEL(chan_num) \
-	(REG_BIT8 << chan_num)
-
-#define DESCS_PER_CHANNEL	16
-/*DMA Registers*/
-/*registers associated with channel programming*/
-#define DMA_REG_SIZE		0x400
-#define DMA_CH_SIZE		0x58
-
-/*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
-#define SAR			0x00 /* Source Address Register*/
-#define DAR			0x08 /* Destination Address Register*/
-#define LLP			0x10 /* Linked List Pointer Register*/
-#define CTL_LOW			0x18 /* Control Register*/
-#define CTL_HIGH		0x1C /* Control Register*/
-#define CFG_LOW			0x40 /* Configuration Register Low*/
-#define CFG_HIGH		0x44 /* Configuration Register high*/
-
-#define STATUS_TFR		0x2E8
-#define STATUS_BLOCK		0x2F0
-#define STATUS_ERR		0x308
-
-#define RAW_TFR			0x2C0
-#define RAW_BLOCK		0x2C8
-#define RAW_ERR			0x2E0
-
-#define MASK_TFR		0x310
-#define MASK_BLOCK		0x318
-#define MASK_SRC_TRAN		0x320
-#define MASK_DST_TRAN		0x328
-#define MASK_ERR		0x330
-
-#define CLEAR_TFR		0x338
-#define CLEAR_BLOCK		0x340
-#define CLEAR_SRC_TRAN		0x348
-#define CLEAR_DST_TRAN		0x350
-#define CLEAR_ERR		0x358
-
-#define INTR_STATUS		0x360
-#define DMA_CFG			0x398
-#define DMA_CHAN_EN		0x3A0
-
-/*DMA channel control registers*/
-union intel_mid_dma_ctl_lo {
-	struct {
-		u32	int_en:1;	/*enable or disable interrupts*/
-					/*should be 0*/
-		u32	dst_tr_width:3;	/*destination transfer width*/
-					/*usually 32 bits = 010*/
-		u32	src_tr_width:3; /*source transfer width*/
-					/*usually 32 bits = 010*/
-		u32	dinc:2;		/*destination address inc/dec*/
-					/*For mem:INC=00, Periphral NoINC=11*/
-		u32	sinc:2;		/*source address inc or dec, as above*/
-		u32	dst_msize:3;	/*destination burst transaction length*/
-					/*always = 16 ie 011*/
-		u32	src_msize:3;	/*source burst transaction length*/
-					/*always = 16 ie 011*/
-		u32	reser1:3;
-		u32	tt_fc:3;	/*transfer type and flow controller*/
-					/*M-M = 000
-					  P-M = 010
-					  M-P = 001*/
-		u32	dms:2;		/*destination master select = 0*/
-		u32	sms:2;		/*source master select = 0*/
-		u32	llp_dst_en:1;	/*enable/disable destination LLP = 0*/
-		u32	llp_src_en:1;	/*enable/disable source LLP = 0*/
-		u32	reser2:3;
-	} ctlx;
-	u32	ctl_lo;
-};
-
-union intel_mid_dma_ctl_hi {
-	struct {
-		u32	block_ts:12;	/*block transfer size*/
-		u32	done:1;		/*Done - updated by DMAC*/
-		u32	reser:19;	/*configured by DMAC*/
-	} ctlx;
-	u32	ctl_hi;
-
-};
-
-/*DMA channel configuration registers*/
-union intel_mid_dma_cfg_lo {
-	struct {
-		u32	reser1:5;
-		u32	ch_prior:3;	/*channel priority = 0*/
-		u32	ch_susp:1;	/*channel suspend = 0*/
-		u32	fifo_empty:1;	/*FIFO empty or not R bit = 0*/
-		u32	hs_sel_dst:1;	/*select HW/SW destn handshaking*/
-					/*HW = 0, SW = 1*/
-		u32	hs_sel_src:1;	/*select HW/SW src handshaking*/
-		u32	reser2:6;
-		u32	dst_hs_pol:1;	/*dest HS interface polarity*/
-		u32	src_hs_pol:1;	/*src HS interface polarity*/
-		u32	max_abrst:10;	/*max AMBA burst len = 0 (no sw limit*/
-		u32	reload_src:1;	/*auto reload src addr =1 if src is P*/
-		u32	reload_dst:1;	/*AR destn addr =1 if dstn is P*/
-	} cfgx;
-	u32	cfg_lo;
-};
-
-union intel_mid_dma_cfg_hi {
-	struct {
-		u32	fcmode:1;	/*flow control mode = 1*/
-		u32	fifo_mode:1;	/*FIFO mode select = 1*/
-		u32	protctl:3;	/*protection control = 0*/
-		u32	rsvd:2;
-		u32	src_per:4;	/*src hw HS interface*/
-		u32	dst_per:4;	/*dstn hw HS interface*/
-		u32	reser2:17;
-	} cfgx;
-	u32	cfg_hi;
-};
-
-
-/**
- * struct intel_mid_dma_chan - internal mid representation of a DMA channel
- * @chan: dma_chan strcture represetation for mid chan
- * @ch_regs: MMIO register space pointer to channel register
- * @dma_base: MMIO register space DMA engine base pointer
- * @ch_id: DMA channel id
- * @lock: channel spinlock
- * @active_list: current active descriptors
- * @queue: current queued up descriptors
- * @free_list: current free descriptors
- * @slave: dma slave structure
- * @descs_allocated: total number of descriptors allocated
- * @dma: dma device structure pointer
- * @busy: bool representing if ch is busy (active txn) or not
- * @in_use: bool representing if ch is in use or not
- * @raw_tfr: raw trf interrupt received
- * @raw_block: raw block interrupt received
- */
-struct intel_mid_dma_chan {
-	struct dma_chan		chan;
-	void __iomem		*ch_regs;
-	void __iomem		*dma_base;
-	int			ch_id;
-	spinlock_t		lock;
-	struct list_head	active_list;
-	struct list_head	queue;
-	struct list_head	free_list;
-	unsigned int		descs_allocated;
-	struct middma_device	*dma;
-	bool			busy;
-	bool			in_use;
-	u32			raw_tfr;
-	u32			raw_block;
-	struct intel_mid_dma_slave *mid_slave;
-};
-
-static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
-						struct dma_chan *chan)
-{
-	return container_of(chan, struct intel_mid_dma_chan, chan);
-}
-
-enum intel_mid_dma_state {
-	RUNNING = 0,
-	SUSPENDED,
-};
-/**
- * struct middma_device - internal representation of a DMA device
- * @pdev: PCI device
- * @dma_base: MMIO register space pointer of DMA
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @tasklet: dma tasklet for processing interrupts
- * @ch: per channel data
- * @pci_id: DMA device PCI ID
- * @intr_mask: Interrupt mask to be used
- * @mask_reg: MMIO register for periphral mask
- * @chan_base: Base ch index (read from driver data)
- * @max_chan: max number of chs supported (from drv_data)
- * @block_size: Block size of DMA transfer supported (from drv_data)
- * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data)
- * @state: dma PM device state
- */
-struct middma_device {
-	struct pci_dev		*pdev;
-	void __iomem		*dma_base;
-	struct pci_pool		*dma_pool;
-	struct dma_device	common;
-	struct tasklet_struct   tasklet;
-	struct intel_mid_dma_chan ch[MAX_CHAN];
-	unsigned int		pci_id;
-	unsigned int		intr_mask;
-	void __iomem		*mask_reg;
-	int			chan_base;
-	int			max_chan;
-	int			block_size;
-	unsigned int		pimr_mask;
-	enum intel_mid_dma_state state;
-};
-
-static inline struct middma_device *to_middma_device(struct dma_device *common)
-{
-	return container_of(common, struct middma_device, common);
-}
-
-struct intel_mid_dma_desc {
-	void __iomem			*block; /*ch ptr*/
-	struct list_head		desc_node;
-	struct dma_async_tx_descriptor	txd;
-	size_t				len;
-	dma_addr_t			sar;
-	dma_addr_t			dar;
-	u32				cfg_hi;
-	u32				cfg_lo;
-	u32				ctl_lo;
-	u32				ctl_hi;
-	struct pci_pool			*lli_pool;
-	struct intel_mid_dma_lli	*lli;
-	dma_addr_t			lli_phys;
-	unsigned int			lli_length;
-	unsigned int			current_lli;
-	dma_addr_t			next;
-	enum dma_transfer_direction		dirn;
-	enum dma_status			status;
-	enum dma_slave_buswidth		width; /*width of DMA txn*/
-	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
-
-};
-
-struct intel_mid_dma_lli {
-	dma_addr_t			sar;
-	dma_addr_t			dar;
-	dma_addr_t			llp;
-	u32				ctl_lo;
-	u32				ctl_hi;
-} __attribute__ ((packed));
-
-static inline int test_ch_en(void __iomem *dma, u32 ch_no)
-{
-	u32 en_reg = ioread32(dma + DMA_CHAN_EN);
-	return (en_reg >> ch_no) & 0x1;
-}
-
-static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
-		(struct dma_async_tx_descriptor *txd)
-{
-	return container_of(txd, struct intel_mid_dma_desc, txd);
-}
-
-static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
-		(struct dma_slave_config *slave)
-{
-	return container_of(slave, struct intel_mid_dma_slave, dma_slave);
-}
-
-
-int dma_resume(struct device *dev);
-
-#endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5907c17..92772ff 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -20,8 +20,7 @@
  */
 static atomic_t drv_instances = ATOMIC_INIT(0);
 
-/* Per-node driver instances */
-static struct mem_ctl_info **mcis;
+/* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
 /*
@@ -903,9 +902,17 @@
 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
 static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
 {
-	u64 addr;
+	u16 mce_nid = amd_get_nb_id(m->extcpu);
+	struct mem_ctl_info *mci;
 	u8 start_bit = 1;
 	u8 end_bit   = 47;
+	u64 addr;
+
+	mci = edac_mc_find(mce_nid);
+	if (!mci)
+		return 0;
+
+	pvt = mci->pvt_info;
 
 	if (pvt->fam == 0xf) {
 		start_bit = 3;
@@ -918,17 +925,13 @@
 	 * Erratum 637 workaround
 	 */
 	if (pvt->fam == 0x15) {
-		struct amd64_pvt *pvt;
 		u64 cc6_base, tmp_addr;
 		u32 tmp;
-		u16 mce_nid;
 		u8 intlv_en;
 
 		if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
 			return addr;
 
-		mce_nid	= amd_get_nb_id(m->extcpu);
-		pvt	= mcis[mce_nid]->pvt_info;
 
 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
 		intlv_en = tmp >> 21 & 0x7;
@@ -1511,7 +1514,7 @@
 	int cs_found = -EINVAL;
 	int csrow;
 
-	mci = mcis[nid];
+	mci = edac_mc_find(nid);
 	if (!mci)
 		return cs_found;
 
@@ -2663,34 +2666,6 @@
 	return true;
 }
 
-static int set_mc_sysfs_attrs(struct mem_ctl_info *mci)
-{
-	struct amd64_pvt *pvt = mci->pvt_info;
-	int rc;
-
-	rc = amd64_create_sysfs_dbg_files(mci);
-	if (rc < 0)
-		return rc;
-
-	if (pvt->fam >= 0x10) {
-		rc = amd64_create_sysfs_inject_files(mci);
-		if (rc < 0)
-			return rc;
-	}
-
-	return 0;
-}
-
-static void del_mc_sysfs_attrs(struct mem_ctl_info *mci)
-{
-	struct amd64_pvt *pvt = mci->pvt_info;
-
-	amd64_remove_sysfs_dbg_files(mci);
-
-	if (pvt->fam >= 0x10)
-		amd64_remove_sysfs_inject_files(mci);
-}
-
 static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
 				 struct amd64_family_type *fam)
 {
@@ -2778,6 +2753,16 @@
 	return fam_type;
 }
 
+static const struct attribute_group *amd64_edac_attr_groups[] = {
+#ifdef CONFIG_EDAC_DEBUG
+	&amd64_edac_dbg_group,
+#endif
+#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
+	&amd64_edac_inj_group,
+#endif
+	NULL
+};
+
 static int init_one_instance(struct pci_dev *F2)
 {
 	struct amd64_pvt *pvt = NULL;
@@ -2844,14 +2829,10 @@
 		mci->edac_cap = EDAC_FLAG_NONE;
 
 	ret = -ENODEV;
-	if (edac_mc_add_mc(mci)) {
+	if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
 		edac_dbg(1, "failed edac_mc_add_mc()\n");
 		goto err_add_mc;
 	}
-	if (set_mc_sysfs_attrs(mci)) {
-		edac_dbg(1, "failed edac_mc_add_mc()\n");
-		goto err_add_sysfs;
-	}
 
 	/* register stuff with EDAC MCE */
 	if (report_gart_errors)
@@ -2859,14 +2840,10 @@
 
 	amd_register_ecc_decoder(decode_bus_error);
 
-	mcis[nid] = mci;
-
 	atomic_inc(&drv_instances);
 
 	return 0;
 
-err_add_sysfs:
-	edac_mc_del_mc(mci->pdev);
 err_add_mc:
 	edac_mc_free(mci);
 
@@ -2940,7 +2917,6 @@
 	mci = find_mci_by_dev(&pdev->dev);
 	WARN_ON(!mci);
 
-	del_mc_sysfs_attrs(mci);
 	/* Remove from EDAC CORE tracking list */
 	mci = edac_mc_del_mc(&pdev->dev);
 	if (!mci)
@@ -2961,7 +2937,6 @@
 
 	/* Free the EDAC CORE resources */
 	mci->pvt_info = NULL;
-	mcis[nid] = NULL;
 
 	kfree(pvt);
 	edac_mc_free(mci);
@@ -2999,7 +2974,7 @@
 	if (pci_ctl)
 		return;
 
-	mci = mcis[0];
+	mci = edac_mc_find(0);
 	if (!mci)
 		return;
 
@@ -3023,9 +2998,8 @@
 		goto err_ret;
 
 	err = -ENOMEM;
-	mcis	  = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
-	if (!(mcis && ecc_stngs))
+	if (!ecc_stngs)
 		goto err_free;
 
 	msrs = msrs_alloc();
@@ -3056,9 +3030,6 @@
 	msrs = NULL;
 
 err_free:
-	kfree(mcis);
-	mcis = NULL;
-
 	kfree(ecc_stngs);
 	ecc_stngs = NULL;
 
@@ -3076,9 +3047,6 @@
 	kfree(ecc_stngs);
 	ecc_stngs = NULL;
 
-	kfree(mcis);
-	mcis = NULL;
-
 	msrs_free(msrs);
 	msrs = NULL;
 }
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index d8468c6..4bdec75 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -453,31 +453,11 @@
 };
 
 #ifdef CONFIG_EDAC_DEBUG
-int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci);
-void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci);
-
-#else
-static inline int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-	return 0;
-}
-static void inline amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-}
+extern const struct attribute_group amd64_edac_dbg_group;
 #endif
 
 #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
-int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci);
-void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci);
-
-#else
-static inline int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
-{
-	return 0;
-}
-static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
-{
-}
+extern const struct attribute_group amd64_edac_inj_group;
 #endif
 
 /*
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index 2c1bbf7..4709c60 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -40,34 +40,15 @@
 static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL);
 static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL);
 
-int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-	int rc;
+static struct attribute *amd64_edac_dbg_attrs[] = {
+	&dev_attr_dhar.attr,
+	&dev_attr_dbam.attr,
+	&dev_attr_topmem.attr,
+	&dev_attr_topmem2.attr,
+	&dev_attr_dram_hole.attr,
+	NULL
+};
 
-	rc = device_create_file(&mci->dev, &dev_attr_dhar);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_dbam);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_topmem);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_topmem2);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_dram_hole);
-	if (rc < 0)
-		return rc;
-
-	return 0;
-}
-
-void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-	device_remove_file(&mci->dev, &dev_attr_dhar);
-	device_remove_file(&mci->dev, &dev_attr_dbam);
-	device_remove_file(&mci->dev, &dev_attr_topmem);
-	device_remove_file(&mci->dev, &dev_attr_topmem2);
-	device_remove_file(&mci->dev, &dev_attr_dram_hole);
-}
+const struct attribute_group amd64_edac_dbg_group = {
+	.attrs = amd64_edac_dbg_attrs,
+};
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
index 0d66ae6..e14977f 100644
--- a/drivers/edac/amd64_edac_inj.c
+++ b/drivers/edac/amd64_edac_inj.c
@@ -207,35 +207,28 @@
 static DEVICE_ATTR(inject_read,  S_IWUSR,
 		   NULL, amd64_inject_read_store);
 
+static struct attribute *amd64_edac_inj_attrs[] = {
+	&dev_attr_inject_section.attr,
+	&dev_attr_inject_word.attr,
+	&dev_attr_inject_ecc_vector.attr,
+	&dev_attr_inject_write.attr,
+	&dev_attr_inject_read.attr,
+	NULL
+};
 
-int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
+static umode_t amd64_edac_inj_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int idx)
 {
-	int rc;
+	struct device *dev = kobj_to_dev(kobj);
+	struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
+	struct amd64_pvt *pvt = mci->pvt_info;
 
-	rc = device_create_file(&mci->dev, &dev_attr_inject_section);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_word);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_ecc_vector);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_write);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_read);
-	if (rc < 0)
-		return rc;
-
-	return 0;
+	if (pvt->fam < 0x10)
+		return 0;
+	return attr->mode;
 }
 
-void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
-{
-	device_remove_file(&mci->dev, &dev_attr_inject_section);
-	device_remove_file(&mci->dev, &dev_attr_inject_word);
-	device_remove_file(&mci->dev, &dev_attr_inject_ecc_vector);
-	device_remove_file(&mci->dev, &dev_attr_inject_write);
-	device_remove_file(&mci->dev, &dev_attr_inject_read);
-}
+const struct attribute_group amd64_edac_inj_group = {
+	.attrs = amd64_edac_inj_attrs,
+	.is_visible = amd64_edac_inj_is_visible,
+};
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 6c9f381..ad42587 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -446,7 +446,9 @@
 				   unsigned n_layers,
 				   struct edac_mc_layer *layers,
 				   unsigned sz_pvt);
-extern int edac_mc_add_mc(struct mem_ctl_info *mci);
+extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
+				      const struct attribute_group **groups);
+#define edac_mc_add_mc(mci)	edac_mc_add_mc_with_groups(mci, NULL)
 extern void edac_mc_free(struct mem_ctl_info *mci);
 extern struct mem_ctl_info *edac_mc_find(int idx);
 extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 1747906..af3be19 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -710,9 +710,10 @@
 EXPORT_SYMBOL(edac_mc_find);
 
 /**
- * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
- *                 create sysfs entries associated with mci structure
+ * edac_mc_add_mc_with_groups: Insert the 'mci' structure into the mci
+ *	global list and create sysfs entries associated with mci structure
  * @mci: pointer to the mci structure to be added to the list
+ * @groups: optional attribute groups for the driver-specific sysfs entries
  *
  * Return:
  *	0	Success
@@ -720,7 +721,8 @@
  */
 
 /* FIXME - should a warning be printed if no error detection? correction? */
-int edac_mc_add_mc(struct mem_ctl_info *mci)
+int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
+			       const struct attribute_group **groups)
 {
 	int ret = -EINVAL;
 	edac_dbg(0, "\n");
@@ -771,7 +773,7 @@
 
 	mci->bus = &mc_bus[mci->mc_idx];
 
-	if (edac_create_sysfs_mci_device(mci)) {
+	if (edac_create_sysfs_mci_device(mci, groups)) {
 		edac_mc_printk(mci, KERN_WARNING,
 			"failed to create sysfs device\n");
 		goto fail1;
@@ -805,7 +807,7 @@
 	mutex_unlock(&mem_ctls_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(edac_mc_add_mc);
+EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups);
 
 /**
  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index c84eecb..112d63a 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -323,13 +323,14 @@
 	channel_dimm_label_show, channel_dimm_label_store, 5);
 
 /* Total possible dynamic DIMM Label attribute file table */
-static struct device_attribute *dynamic_csrow_dimm_attr[] = {
-	&dev_attr_legacy_ch0_dimm_label.attr,
-	&dev_attr_legacy_ch1_dimm_label.attr,
-	&dev_attr_legacy_ch2_dimm_label.attr,
-	&dev_attr_legacy_ch3_dimm_label.attr,
-	&dev_attr_legacy_ch4_dimm_label.attr,
-	&dev_attr_legacy_ch5_dimm_label.attr
+static struct attribute *dynamic_csrow_dimm_attr[] = {
+	&dev_attr_legacy_ch0_dimm_label.attr.attr,
+	&dev_attr_legacy_ch1_dimm_label.attr.attr,
+	&dev_attr_legacy_ch2_dimm_label.attr.attr,
+	&dev_attr_legacy_ch3_dimm_label.attr.attr,
+	&dev_attr_legacy_ch4_dimm_label.attr.attr,
+	&dev_attr_legacy_ch5_dimm_label.attr.attr,
+	NULL
 };
 
 /* possible dynamic channel ce_count attribute files */
@@ -347,13 +348,45 @@
 		   channel_ce_count_show, NULL, 5);
 
 /* Total possible dynamic ce_count attribute file table */
-static struct device_attribute *dynamic_csrow_ce_count_attr[] = {
-	&dev_attr_legacy_ch0_ce_count.attr,
-	&dev_attr_legacy_ch1_ce_count.attr,
-	&dev_attr_legacy_ch2_ce_count.attr,
-	&dev_attr_legacy_ch3_ce_count.attr,
-	&dev_attr_legacy_ch4_ce_count.attr,
-	&dev_attr_legacy_ch5_ce_count.attr
+static struct attribute *dynamic_csrow_ce_count_attr[] = {
+	&dev_attr_legacy_ch0_ce_count.attr.attr,
+	&dev_attr_legacy_ch1_ce_count.attr.attr,
+	&dev_attr_legacy_ch2_ce_count.attr.attr,
+	&dev_attr_legacy_ch3_ce_count.attr.attr,
+	&dev_attr_legacy_ch4_ce_count.attr.attr,
+	&dev_attr_legacy_ch5_ce_count.attr.attr,
+	NULL
+};
+
+static umode_t csrow_dev_is_visible(struct kobject *kobj,
+				    struct attribute *attr, int idx)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
+
+	if (idx >= csrow->nr_channels)
+		return 0;
+	/* Only expose populated DIMMs */
+	if (!csrow->channels[idx]->dimm->nr_pages)
+		return 0;
+	return attr->mode;
+}
+
+
+static const struct attribute_group csrow_dev_dimm_group = {
+	.attrs = dynamic_csrow_dimm_attr,
+	.is_visible = csrow_dev_is_visible,
+};
+
+static const struct attribute_group csrow_dev_ce_count_group = {
+	.attrs = dynamic_csrow_ce_count_attr,
+	.is_visible = csrow_dev_is_visible,
+};
+
+static const struct attribute_group *csrow_dev_groups[] = {
+	&csrow_dev_dimm_group,
+	&csrow_dev_ce_count_group,
+	NULL
 };
 
 static inline int nr_pages_per_csrow(struct csrow_info *csrow)
@@ -370,13 +403,12 @@
 static int edac_create_csrow_object(struct mem_ctl_info *mci,
 				    struct csrow_info *csrow, int index)
 {
-	int err, chan;
-
 	if (csrow->nr_channels > EDAC_NR_CHANNELS)
 		return -ENODEV;
 
 	csrow->dev.type = &csrow_attr_type;
 	csrow->dev.bus = mci->bus;
+	csrow->dev.groups = csrow_dev_groups;
 	device_initialize(&csrow->dev);
 	csrow->dev.parent = &mci->dev;
 	csrow->mci = mci;
@@ -386,45 +418,13 @@
 	edac_dbg(0, "creating (virtual) csrow node %s\n",
 		 dev_name(&csrow->dev));
 
-	err = device_add(&csrow->dev);
-	if (err < 0)
-		return err;
-
-	for (chan = 0; chan < csrow->nr_channels; chan++) {
-		/* Only expose populated DIMMs */
-		if (!csrow->channels[chan]->dimm->nr_pages)
-			continue;
-		err = device_create_file(&csrow->dev,
-					 dynamic_csrow_dimm_attr[chan]);
-		if (err < 0)
-			goto error;
-		err = device_create_file(&csrow->dev,
-					 dynamic_csrow_ce_count_attr[chan]);
-		if (err < 0) {
-			device_remove_file(&csrow->dev,
-					   dynamic_csrow_dimm_attr[chan]);
-			goto error;
-		}
-	}
-
-	return 0;
-
-error:
-	for (--chan; chan >= 0; chan--) {
-		device_remove_file(&csrow->dev,
-					dynamic_csrow_dimm_attr[chan]);
-		device_remove_file(&csrow->dev,
-					   dynamic_csrow_ce_count_attr[chan]);
-	}
-	put_device(&csrow->dev);
-
-	return err;
+	return device_add(&csrow->dev);
 }
 
 /* Create a CSROW object under specifed edac_mc_device */
 static int edac_create_csrow_objects(struct mem_ctl_info *mci)
 {
-	int err, i, chan;
+	int err, i;
 	struct csrow_info *csrow;
 
 	for (i = 0; i < mci->nr_csrows; i++) {
@@ -446,14 +446,6 @@
 		csrow = mci->csrows[i];
 		if (!nr_pages_per_csrow(csrow))
 			continue;
-		for (chan = csrow->nr_channels - 1; chan >= 0; chan--) {
-			if (!csrow->channels[chan]->dimm->nr_pages)
-				continue;
-			device_remove_file(&csrow->dev,
-						dynamic_csrow_dimm_attr[chan]);
-			device_remove_file(&csrow->dev,
-						dynamic_csrow_ce_count_attr[chan]);
-		}
 		put_device(&mci->csrows[i]->dev);
 	}
 
@@ -462,23 +454,13 @@
 
 static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
 {
-	int i, chan;
+	int i;
 	struct csrow_info *csrow;
 
 	for (i = mci->nr_csrows - 1; i >= 0; i--) {
 		csrow = mci->csrows[i];
 		if (!nr_pages_per_csrow(csrow))
 			continue;
-		for (chan = csrow->nr_channels - 1; chan >= 0; chan--) {
-			if (!csrow->channels[chan]->dimm->nr_pages)
-				continue;
-			edac_dbg(1, "Removing csrow %d channel %d sysfs nodes\n",
-				 i, chan);
-			device_remove_file(&csrow->dev,
-						dynamic_csrow_dimm_attr[chan]);
-			device_remove_file(&csrow->dev,
-						dynamic_csrow_ce_count_attr[chan]);
-		}
 		device_unregister(&mci->csrows[i]->dev);
 	}
 }
@@ -863,7 +845,8 @@
 static DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
 
 /* memory scrubber attribute file */
-static DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL);
+DEVICE_ATTR(sdram_scrub_rate, 0, mci_sdram_scrub_rate_show,
+	    mci_sdram_scrub_rate_store); /* umode set later in is_visible */
 
 static struct attribute *mci_attrs[] = {
 	&dev_attr_reset_counters.attr,
@@ -875,11 +858,29 @@
 	&dev_attr_ue_count.attr,
 	&dev_attr_ce_count.attr,
 	&dev_attr_max_location.attr,
+	&dev_attr_sdram_scrub_rate.attr,
 	NULL
 };
 
+static umode_t mci_attr_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int idx)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct mem_ctl_info *mci = to_mci(dev);
+	umode_t mode = 0;
+
+	if (attr != &dev_attr_sdram_scrub_rate.attr)
+		return attr->mode;
+	if (mci->get_sdram_scrub_rate)
+		mode |= S_IRUGO;
+	if (mci->set_sdram_scrub_rate)
+		mode |= S_IWUSR;
+	return mode;
+}
+
 static struct attribute_group mci_attr_grp = {
 	.attrs	= mci_attrs,
+	.is_visible = mci_attr_is_visible,
 };
 
 static const struct attribute_group *mci_attr_groups[] = {
@@ -913,7 +914,7 @@
 	return 0;
 }
 
-void __exit edac_debugfs_exit(void)
+void edac_debugfs_exit(void)
 {
 	debugfs_remove(edac_debugfs);
 }
@@ -973,7 +974,8 @@
  *	0	Success
  *	!0	Failure
  */
-int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
+int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
+				 const struct attribute_group **groups)
 {
 	int i, err;
 
@@ -997,6 +999,7 @@
 
 	mci->dev.parent = mci_pdev;
 	mci->dev.bus = mci->bus;
+	mci->dev.groups = groups;
 	dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
 	dev_set_drvdata(&mci->dev, mci);
 	pm_runtime_forbid(&mci->dev);
@@ -1008,23 +1011,6 @@
 		goto fail_unregister_bus;
 	}
 
-	if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) {
-		if (mci->get_sdram_scrub_rate) {
-			dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO;
-			dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show;
-		}
-
-		if (mci->set_sdram_scrub_rate) {
-			dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR;
-			dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store;
-		}
-
-		err = device_create_file(&mci->dev, &dev_attr_sdram_scrub_rate);
-		if (err) {
-			edac_dbg(1, "failure: create sdram_scrub_rate\n");
-			goto fail_unregister_dev;
-		}
-	}
 	/*
 	 * Create the dimm/rank devices
 	 */
@@ -1071,7 +1057,6 @@
 
 		device_unregister(&dimm->dev);
 	}
-fail_unregister_dev:
 	device_unregister(&mci->dev);
 fail_unregister_bus:
 	bus_unregister(mci->bus);
@@ -1170,7 +1155,7 @@
 	return err;
 }
 
-void __exit edac_mc_sysfs_exit(void)
+void edac_mc_sysfs_exit(void)
 {
 	device_unregister(mci_pdev);
 	edac_put_sysfs_subsys();
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index e6d1691..9cb082a 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -112,20 +112,23 @@
 
 	err = edac_mc_sysfs_init();
 	if (err)
-		goto error;
+		goto err_sysfs;
 
 	edac_debugfs_init();
 
-	/* Setup/Initialize the workq for this core */
 	err = edac_workqueue_setup();
 	if (err) {
-		edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n");
-		goto error;
+		edac_printk(KERN_ERR, EDAC_MC, "Failure initializing workqueue\n");
+		goto err_wq;
 	}
 
 	return 0;
 
-error:
+err_wq:
+	edac_debugfs_exit();
+	edac_mc_sysfs_exit();
+
+err_sysfs:
 	return err;
 }
 
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index f2118bf..26ecc52 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -22,7 +22,8 @@
 	/* on edac_mc_sysfs.c */
 int edac_mc_sysfs_init(void);
 void edac_mc_sysfs_exit(void);
-extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci);
+extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
+					const struct attribute_group **groups);
 extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
 void edac_unregister_sysfs(struct mem_ctl_info *mci);
 extern int edac_get_log_ue(void);
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
index f784de1..11260cc 100644
--- a/drivers/edac/highbank_mc_edac.c
+++ b/drivers/edac/highbank_mc_edac.c
@@ -124,6 +124,13 @@
 
 static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl);
 
+static struct attribute *highbank_dev_attrs[] = {
+	&dev_attr_inject_ctrl.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(highbank_dev);
+
 struct hb_mc_settings {
 	int	err_offset;
 	int	int_offset;
@@ -139,7 +146,7 @@
 	.int_offset = MW_DDR_ECC_INT_BASE,
 };
 
-static struct of_device_id hb_ddr_ctrl_of_match[] = {
+static const struct of_device_id hb_ddr_ctrl_of_match[] = {
 	{ .compatible = "calxeda,hb-ddr-ctrl",		.data = &hb_settings },
 	{ .compatible = "calxeda,ecx-2000-ddr-ctrl",	.data = &mw_settings },
 	{},
@@ -231,7 +238,7 @@
 	dimm->mtype = MEM_DDR3;
 	dimm->edac_mode = EDAC_SECDED;
 
-	res = edac_mc_add_mc(mci);
+	res = edac_mc_add_mc_with_groups(mci, highbank_dev_groups);
 	if (res < 0)
 		goto err;
 
@@ -243,8 +250,6 @@
 		goto err2;
 	}
 
-	device_create_file(&mci->dev, &dev_attr_inject_ctrl);
-
 	devres_close_group(&pdev->dev, NULL);
 	return 0;
 err2:
@@ -259,7 +264,6 @@
 {
 	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
 
-	device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
 	edac_mc_del_mc(&pdev->dev);
 	edac_mc_free(mci);
 	return 0;
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 9cd0b30..01087a3 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1157,27 +1157,24 @@
 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
 		   i7core_inject_enable_show, i7core_inject_enable_store);
 
+static struct attribute *i7core_dev_attrs[] = {
+	&dev_attr_inject_section.attr,
+	&dev_attr_inject_type.attr,
+	&dev_attr_inject_eccmask.attr,
+	&dev_attr_inject_enable.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(i7core_dev);
+
 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
 {
 	struct i7core_pvt *pvt = mci->pvt_info;
 	int rc;
 
-	rc = device_create_file(&mci->dev, &dev_attr_inject_section);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_type);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_enable);
-	if (rc < 0)
-		return rc;
-
 	pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
 	if (!pvt->addrmatch_dev)
-		return rc;
+		return -ENOMEM;
 
 	pvt->addrmatch_dev->type = &addrmatch_type;
 	pvt->addrmatch_dev->bus = mci->dev.bus;
@@ -1198,7 +1195,7 @@
 		if (!pvt->chancounts_dev) {
 			put_device(pvt->addrmatch_dev);
 			device_del(pvt->addrmatch_dev);
-			return rc;
+			return -ENOMEM;
 		}
 
 		pvt->chancounts_dev->type = &all_channel_counts_type;
@@ -1223,11 +1220,6 @@
 
 	edac_dbg(1, "\n");
 
-	device_remove_file(&mci->dev, &dev_attr_inject_section);
-	device_remove_file(&mci->dev, &dev_attr_inject_type);
-	device_remove_file(&mci->dev, &dev_attr_inject_eccmask);
-	device_remove_file(&mci->dev, &dev_attr_inject_enable);
-
 	if (!pvt->is_registered) {
 		put_device(pvt->chancounts_dev);
 		device_del(pvt->chancounts_dev);
@@ -2259,7 +2251,7 @@
 		enable_sdram_scrub_setting(mci);
 
 	/* add this new MC control structure to EDAC's list of MCs */
-	if (unlikely(edac_mc_add_mc(mci))) {
+	if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) {
 		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
 		/* FIXME: perhaps some code should go here that disables error
 		 * reporting if we just enabled it
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index b4705d9..4d411036 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -350,8 +350,6 @@
 	return -ENODEV;
 }
 
-EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1);
-
 /* returns count (>= 0), or negative on error */
 static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
 				      const struct pci_device_id *ent)
@@ -384,8 +382,6 @@
 	edac_mc_free(mci);
 }
 
-EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one);
-
 static const struct pci_device_id i82443bxgx_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)},
@@ -445,9 +441,7 @@
 	pci_unregister_driver(&i82443bxgx_edacmc_driver);
 
 fail0:
-	if (mci_pdev != NULL)
-		pci_dev_put(mci_pdev);
-
+	pci_dev_put(mci_pdev);
 	return pci_rc;
 }
 
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index 4382343..ee1078c 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -343,20 +343,15 @@
 	pci_unregister_driver(&i82860_driver);
 
 fail0:
-	if (mci_pdev != NULL)
-		pci_dev_put(mci_pdev);
-
+	pci_dev_put(mci_pdev);
 	return pci_rc;
 }
 
 static void __exit i82860_exit(void)
 {
 	edac_dbg(3, "\n");
-
 	pci_unregister_driver(&i82860_driver);
-
-	if (mci_pdev != NULL)
-		pci_dev_put(mci_pdev);
+	pci_dev_put(mci_pdev);
 }
 
 module_init(i82860_init);
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index 64b6832..c26a513 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -576,9 +576,7 @@
 	pci_unregister_driver(&i82875p_driver);
 
 fail0:
-	if (mci_pdev != NULL)
-		pci_dev_put(mci_pdev);
-
+	pci_dev_put(mci_pdev);
 	return pci_rc;
 }
 
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 10b1052..35ab66c 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -685,9 +685,7 @@
 	pci_unregister_driver(&i82975x_driver);
 
 fail0:
-	if (mci_pdev != NULL)
-		pci_dev_put(mci_pdev);
-
+	pci_dev_put(mci_pdev);
 	return pci_rc;
 }
 
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 1fa76a5..68bf234 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -134,29 +134,14 @@
 DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR,
 	    mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store);
 
-static int mpc85xx_create_sysfs_attributes(struct mem_ctl_info *mci)
-{
-	int rc;
+static struct attribute *mpc85xx_dev_attrs[] = {
+	&dev_attr_inject_data_hi.attr,
+	&dev_attr_inject_data_lo.attr,
+	&dev_attr_inject_ctrl.attr,
+	NULL
+};
 
-	rc = device_create_file(&mci->dev, &dev_attr_inject_data_hi);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_data_lo);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_inject_ctrl);
-	if (rc < 0)
-		return rc;
-
-	return 0;
-}
-
-static void mpc85xx_remove_sysfs_attributes(struct mem_ctl_info *mci)
-{
-	device_remove_file(&mci->dev, &dev_attr_inject_data_hi);
-	device_remove_file(&mci->dev, &dev_attr_inject_data_lo);
-	device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
-}
+ATTRIBUTE_GROUPS(mpc85xx_dev);
 
 /**************************** PCI Err device ***************************/
 #ifdef CONFIG_PCI
@@ -685,7 +670,7 @@
 	return 0;
 }
 
-static struct of_device_id mpc85xx_l2_err_of_match[] = {
+static const struct of_device_id mpc85xx_l2_err_of_match[] = {
 /* deprecate the fsl,85.. forms in the future, 2.6.30? */
 	{ .compatible = "fsl,8540-l2-cache-controller", },
 	{ .compatible = "fsl,8541-l2-cache-controller", },
@@ -1106,13 +1091,7 @@
 	/* clear all error bits */
 	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0);
 
-	if (edac_mc_add_mc(mci)) {
-		edac_dbg(3, "failed edac_mc_add_mc()\n");
-		goto err;
-	}
-
-	if (mpc85xx_create_sysfs_attributes(mci)) {
-		edac_mc_del_mc(mci->pdev);
+	if (edac_mc_add_mc_with_groups(mci, mpc85xx_dev_groups)) {
 		edac_dbg(3, "failed edac_mc_add_mc()\n");
 		goto err;
 	}
@@ -1176,13 +1155,12 @@
 		 orig_ddr_err_disable);
 	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe);
 
-	mpc85xx_remove_sysfs_attributes(mci);
 	edac_mc_del_mc(&op->dev);
 	edac_mc_free(mci);
 	return 0;
 }
 
-static struct of_device_id mpc85xx_mc_err_of_match[] = {
+static const struct of_device_id mpc85xx_mc_err_of_match[] = {
 /* deprecate the fsl,85.. forms in the future, 2.6.30? */
 	{ .compatible = "fsl,8540-memory-controller", },
 	{ .compatible = "fsl,8541-memory-controller", },
diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c
index 4bd10f9..bb19e07 100644
--- a/drivers/edac/octeon_edac-lmc.c
+++ b/drivers/edac/octeon_edac-lmc.c
@@ -209,35 +209,18 @@
 static DEVICE_ATTR(col, S_IRUGO | S_IWUSR,
 		   octeon_mc_inject_col_show, octeon_mc_inject_col_store);
 
+static struct attribute *octeon_dev_attrs[] = {
+	&dev_attr_inject.attr,
+	&dev_attr_error_type.attr,
+	&dev_attr_dimm.attr,
+	&dev_attr_rank.attr,
+	&dev_attr_bank.attr,
+	&dev_attr_row.attr,
+	&dev_attr_col.attr,
+	NULL
+};
 
-static int octeon_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
-{
-	int rc;
-
-	rc = device_create_file(&mci->dev, &dev_attr_inject);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_error_type);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_dimm);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_rank);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_bank);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_row);
-	if (rc < 0)
-		return rc;
-	rc = device_create_file(&mci->dev, &dev_attr_col);
-	if (rc < 0)
-		return rc;
-
-	return 0;
-}
+ATTRIBUTE_GROUPS(octeon_dev);
 
 static int octeon_lmc_edac_probe(struct platform_device *pdev)
 {
@@ -271,18 +254,12 @@
 		mci->ctl_name = "octeon-lmc-err";
 		mci->edac_check = octeon_lmc_edac_poll;
 
-		if (edac_mc_add_mc(mci)) {
+		if (edac_mc_add_mc_with_groups(mci, octeon_dev_groups)) {
 			dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
 			edac_mc_free(mci);
 			return -ENXIO;
 		}
 
-		if (octeon_set_mc_sysfs_attributes(mci)) {
-			dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
-			return -ENXIO;
-		}
-
-
 		cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
 		cfg0.s.intr_ded_ena = 0;	/* We poll */
 		cfg0.s.intr_sec_ena = 0;
@@ -309,18 +286,12 @@
 		mci->ctl_name = "co_lmc_err";
 		mci->edac_check = octeon_lmc_edac_poll_o2;
 
-		if (edac_mc_add_mc(mci)) {
+		if (edac_mc_add_mc_with_groups(mci, octeon_dev_groups)) {
 			dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
 			edac_mc_free(mci);
 			return -ENXIO;
 		}
 
-		if (octeon_set_mc_sysfs_attributes(mci)) {
-			dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
-			return -ENXIO;
-		}
-
-
 		en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
 		en.s.intr_ded_ena = 0;	/* We poll */
 		en.s.intr_sec_ena = 0;
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index 1b64fd0..3515b38 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -193,7 +193,7 @@
  * Device tree node type and compatible tuples this driver can match
  * on.
  */
-static struct of_device_id ppc4xx_edac_match[] = {
+static const struct of_device_id ppc4xx_edac_match[] = {
 	{
 		.compatible	= "ibm,sdram-4xx-ddr2"
 	},
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 1c96915..fc153ae 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -512,7 +512,7 @@
 	return 0;
 }
 
-static struct of_device_id synps_edac_match[] = {
+static const struct of_device_id synps_edac_match[] = {
 	{ .compatible = "xlnx,zynq-ddrc-a05", },
 	{ /* end of table */ }
 };
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 2eebd28b..6e45a43 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -17,7 +17,9 @@
  */
 static const char dmi_empty_string[] = "        ";
 
-static u16 __initdata dmi_ver;
+static u32 dmi_ver __initdata;
+static u32 dmi_len;
+static u16 dmi_num;
 /*
  * Catch too early calls to dmi_check_system():
  */
@@ -78,7 +80,7 @@
  *	We have to be cautious here. We have seen BIOSes with DMI pointers
  *	pointing to completely the wrong place for example
  */
-static void dmi_table(u8 *buf, u32 len, int num,
+static void dmi_table(u8 *buf,
 		      void (*decode)(const struct dmi_header *, void *),
 		      void *private_data)
 {
@@ -91,8 +93,8 @@
 	 * off the end of the table (should never happen but sometimes does
 	 * on bogus implementations.)
 	 */
-	while ((!num || i < num) &&
-	       (data - buf + sizeof(struct dmi_header)) <= len) {
+	while ((!dmi_num || i < dmi_num) &&
+	       (data - buf + sizeof(struct dmi_header)) <= dmi_len) {
 		const struct dmi_header *dm = (const struct dmi_header *)data;
 
 		/*
@@ -101,9 +103,9 @@
 		 *  table in dmi_decode or dmi_string
 		 */
 		data += dm->length;
-		while ((data - buf < len - 1) && (data[0] || data[1]))
+		while ((data - buf < dmi_len - 1) && (data[0] || data[1]))
 			data++;
-		if (data - buf < len - 1)
+		if (data - buf < dmi_len - 1)
 			decode(dm, private_data);
 
 		/*
@@ -118,8 +120,6 @@
 }
 
 static phys_addr_t dmi_base;
-static u32 dmi_len;
-static u16 dmi_num;
 
 static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 		void *))
@@ -130,7 +130,7 @@
 	if (buf == NULL)
 		return -1;
 
-	dmi_table(buf, dmi_len, dmi_num, decode, NULL);
+	dmi_table(buf, decode, NULL);
 
 	add_device_randomness(buf, dmi_len);
 
@@ -201,7 +201,7 @@
 	 * the UUID are supposed to be little-endian encoded.  The specification
 	 * says that this is the defacto standard.
 	 */
-	if (dmi_ver >= 0x0206)
+	if (dmi_ver >= 0x020600)
 		sprintf(s, "%pUL", d);
 	else
 		sprintf(s, "%pUB", d);
@@ -473,7 +473,7 @@
  */
 static int __init dmi_present(const u8 *buf)
 {
-	int smbios_ver;
+	u32 smbios_ver;
 
 	if (memcmp(buf, "_SM_", 4) == 0 &&
 	    buf[5] < 32 && dmi_checksum(buf, buf[5])) {
@@ -506,14 +506,16 @@
 		if (dmi_walk_early(dmi_decode) == 0) {
 			if (smbios_ver) {
 				dmi_ver = smbios_ver;
-				pr_info("SMBIOS %d.%d present.\n",
-				       dmi_ver >> 8, dmi_ver & 0xFF);
+				pr_info("SMBIOS %d.%d%s present.\n",
+					dmi_ver >> 8, dmi_ver & 0xFF,
+					(dmi_ver < 0x0300) ? "" : ".x");
 			} else {
 				dmi_ver = (buf[14] & 0xF0) << 4 |
 					   (buf[14] & 0x0F);
 				pr_info("Legacy DMI %d.%d present.\n",
 				       dmi_ver >> 8, dmi_ver & 0xFF);
 			}
+			dmi_ver <<= 8;
 			dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
 			printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string);
 			return 0;
@@ -531,14 +533,16 @@
 {
 	if (memcmp(buf, "_SM3_", 5) == 0 &&
 	    buf[6] < 32 && dmi_checksum(buf, buf[6])) {
-		dmi_ver = get_unaligned_be16(buf + 7);
+		dmi_ver = get_unaligned_be32(buf + 6);
+		dmi_ver &= 0xFFFFFF;
 		dmi_num = 0;			/* No longer specified */
 		dmi_len = get_unaligned_le32(buf + 12);
 		dmi_base = get_unaligned_le64(buf + 16);
 
 		if (dmi_walk_early(dmi_decode) == 0) {
-			pr_info("SMBIOS %d.%d present.\n",
-				dmi_ver >> 8, dmi_ver & 0xFF);
+			pr_info("SMBIOS %d.%d.%d present.\n",
+				dmi_ver >> 16, (dmi_ver >> 8) & 0xFF,
+				dmi_ver & 0xFF);
 			dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
 			pr_debug("DMI: %s\n", dmi_ids_string);
 			return 0;
@@ -893,7 +897,7 @@
 	if (buf == NULL)
 		return -1;
 
-	dmi_table(buf, dmi_len, dmi_num, decode, private_data);
+	dmi_table(buf, decode, private_data);
 
 	dmi_unmap(buf);
 	return 0;
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index dcae482..e29560e 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -175,7 +175,7 @@
 	unsigned long initrd_addr;
 	u64 initrd_size = 0;
 	unsigned long fdt_addr = 0;  /* Original DTB */
-	u64 fdt_size = 0;  /* We don't get size from configuration table */
+	unsigned long fdt_size = 0;
 	char *cmdline_ptr = NULL;
 	int cmdline_size = 0;
 	unsigned long new_fdt_addr;
@@ -239,8 +239,7 @@
 	} else {
 		status = handle_cmdline_files(sys_table, image, cmdline_ptr,
 					      "dtb=",
-					      ~0UL, (unsigned long *)&fdt_addr,
-					      (unsigned long *)&fdt_size);
+					      ~0UL, &fdt_addr, &fdt_size);
 
 		if (status != EFI_SUCCESS) {
 			pr_efi_err(sys_table, "Failed to load device tree!\n");
@@ -252,7 +251,7 @@
 		pr_efi(sys_table, "Using DTB from command line\n");
 	} else {
 		/* Look for a device tree configuration table entry. */
-		fdt_addr = (uintptr_t)get_fdt(sys_table);
+		fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size);
 		if (fdt_addr)
 			pr_efi(sys_table, "Using DTB from configuration table\n");
 	}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 47437b1..e334a01 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -41,7 +41,7 @@
 					    unsigned long fdt_addr,
 					    unsigned long fdt_size);
 
-void *get_fdt(efi_system_table_t *sys_table);
+void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size);
 
 void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 		     unsigned long desc_size, efi_memory_desc_t *runtime_map,
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 91da56c..ef5d764 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -323,7 +323,7 @@
 	return EFI_LOAD_ERROR;
 }
 
-void *get_fdt(efi_system_table_t *sys_table)
+void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size)
 {
 	efi_guid_t fdt_guid = DEVICE_TREE_GUID;
 	efi_config_table_t *tables;
@@ -336,6 +336,11 @@
 	for (i = 0; i < sys_table->nr_tables; i++)
 		if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) {
 			fdt = (void *) tables[i].table;
+			if (fdt_check_header(fdt) != 0) {
+				pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n");
+				return NULL;
+			}
+			*fdt_size = fdt_totalsize(fdt);
 			break;
 	 }
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 679b10e..b6f076b2 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -2121,7 +2121,7 @@
 	connector = drm_connector_find(dev, out_resp->connector_id);
 	if (!connector) {
 		ret = -ENOENT;
-		goto out;
+		goto out_unlock;
 	}
 
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
@@ -2201,6 +2201,8 @@
 
 out:
 	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out_unlock:
 	mutex_unlock(&dev->mode_config.mutex);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cc6ea53..5c66b56 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1095,6 +1095,7 @@
 	/* Gunit-Display CZ domain, 0x182028-0x1821CF */
 	s->gu_ctl0		= I915_READ(VLV_GU_CTL0);
 	s->gu_ctl1		= I915_READ(VLV_GU_CTL1);
+	s->pcbr			= I915_READ(VLV_PCBR);
 	s->clock_gate_dis2	= I915_READ(VLV_GUNIT_CLOCK_GATE2);
 
 	/*
@@ -1189,6 +1190,7 @@
 	/* Gunit-Display CZ domain, 0x182028-0x1821CF */
 	I915_WRITE(VLV_GU_CTL0,			s->gu_ctl0);
 	I915_WRITE(VLV_GU_CTL1,			s->gu_ctl1);
+	I915_WRITE(VLV_PCBR,			s->pcbr);
 	I915_WRITE(VLV_GUNIT_CLOCK_GATE2,	s->clock_gate_dis2);
 }
 
@@ -1197,19 +1199,7 @@
 	u32 val;
 	int err;
 
-	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
-	WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
-
 #define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT)
-	/* Wait for a previous force-off to settle */
-	if (force_on) {
-		err = wait_for(!COND, 20);
-		if (err) {
-			DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n",
-				  I915_READ(VLV_GTLC_SURVIVABILITY_REG));
-			return err;
-		}
-	}
 
 	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
 	val &= ~VLV_GFX_CLK_FORCE_ON_BIT;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8727086..b4faa2d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1094,6 +1094,7 @@
 	/* Display 2 CZ domain */
 	u32 gu_ctl0;
 	u32 gu_ctl1;
+	u32 pcbr;
 	u32 clock_gate_dis2;
 };
 
diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c
index 3e24621..0ec5abd 100644
--- a/drivers/gpu/drm/msm/edp/edp_ctrl.c
+++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c
@@ -332,7 +332,7 @@
 		goto vdda_set_fail;
 	}
 
-	ret = regulator_set_optimum_mode(ctrl->vdda_vreg, VDDA_UA_ON_LOAD);
+	ret = regulator_set_load(ctrl->vdda_vreg, VDDA_UA_ON_LOAD);
 	if (ret < 0) {
 		pr_err("%s: vdda_vreg set regulator mode failed.\n", __func__);
 		goto vdda_set_fail;
@@ -356,7 +356,7 @@
 lvl_enable_fail:
 	regulator_disable(ctrl->vdda_vreg);
 vdda_enable_fail:
-	regulator_set_optimum_mode(ctrl->vdda_vreg, VDDA_UA_OFF_LOAD);
+	regulator_set_load(ctrl->vdda_vreg, VDDA_UA_OFF_LOAD);
 vdda_set_fail:
 	return ret;
 }
@@ -365,7 +365,7 @@
 {
 	regulator_disable(ctrl->lvl_vreg);
 	regulator_disable(ctrl->vdda_vreg);
-	regulator_set_optimum_mode(ctrl->vdda_vreg, VDDA_UA_OFF_LOAD);
+	regulator_set_load(ctrl->vdda_vreg, VDDA_UA_OFF_LOAD);
 }
 
 static int edp_gpio_config(struct edp_ctrl *ctrl)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 052869d..32c2da4 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -339,7 +339,7 @@
 					 enum power_supply_property prop,
 					 union power_supply_propval *val)
 {
-	struct hid_device *dev = container_of(psy, struct hid_device, battery);
+	struct hid_device *dev = power_supply_get_drvdata(psy);
 	int ret = 0;
 	__u8 *buf;
 
@@ -397,26 +397,32 @@
 
 static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field)
 {
-	struct power_supply *battery = &dev->battery;
-	int ret;
+	struct power_supply_desc *psy_desc = NULL;
+	struct power_supply_config psy_cfg = { .drv_data = dev, };
 	unsigned quirks;
 	s32 min, max;
 
 	if (field->usage->hid != HID_DC_BATTERYSTRENGTH)
 		return false;	/* no match */
 
-	if (battery->name != NULL)
+	if (dev->battery != NULL)
 		goto out;	/* already initialized? */
 
-	battery->name = kasprintf(GFP_KERNEL, "hid-%s-battery", dev->uniq);
-	if (battery->name == NULL)
+	psy_desc = kzalloc(sizeof(*psy_desc), GFP_KERNEL);
+	if (psy_desc == NULL)
 		goto out;
 
-	battery->type = POWER_SUPPLY_TYPE_BATTERY;
-	battery->properties = hidinput_battery_props;
-	battery->num_properties = ARRAY_SIZE(hidinput_battery_props);
-	battery->use_for_apm = 0;
-	battery->get_property = hidinput_get_battery_property;
+	psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery", dev->uniq);
+	if (psy_desc->name == NULL) {
+		kfree(psy_desc);
+		goto out;
+	}
+
+	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+	psy_desc->properties = hidinput_battery_props;
+	psy_desc->num_properties = ARRAY_SIZE(hidinput_battery_props);
+	psy_desc->use_for_apm = 0;
+	psy_desc->get_property = hidinput_get_battery_property;
 
 	quirks = find_battery_quirk(dev);
 
@@ -439,27 +445,30 @@
 	dev->battery_report_type = report_type;
 	dev->battery_report_id = field->report->id;
 
-	ret = power_supply_register(&dev->dev, battery);
-	if (ret != 0) {
-		hid_warn(dev, "can't register power supply: %d\n", ret);
-		kfree(battery->name);
-		battery->name = NULL;
+	dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg);
+	if (IS_ERR(dev->battery)) {
+		hid_warn(dev, "can't register power supply: %ld\n",
+				PTR_ERR(dev->battery));
+		kfree(psy_desc->name);
+		kfree(psy_desc);
+		dev->battery = NULL;
+	} else {
+		power_supply_powers(dev->battery, &dev->dev);
 	}
 
-	power_supply_powers(battery, &dev->dev);
-
 out:
 	return true;
 }
 
 static void hidinput_cleanup_battery(struct hid_device *dev)
 {
-	if (!dev->battery.name)
+	if (!dev->battery)
 		return;
 
-	power_supply_unregister(&dev->battery);
-	kfree(dev->battery.name);
-	dev->battery.name = NULL;
+	power_supply_unregister(dev->battery);
+	kfree(dev->battery->desc->name);
+	kfree(dev->battery->desc);
+	dev->battery = NULL;
 }
 #else  /* !CONFIG_HID_BATTERY_STRENGTH */
 static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 1896c01..c906300 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -815,7 +815,8 @@
 	struct led_classdev *leds[MAX_LEDS];
 	unsigned long quirks;
 	struct work_struct state_worker;
-	struct power_supply battery;
+	struct power_supply *battery;
+	struct power_supply_desc battery_desc;
 	int device_id;
 	__u8 *output_report_dmabuf;
 
@@ -1660,7 +1661,7 @@
 				     enum power_supply_property psp,
 				     union power_supply_propval *val)
 {
-	struct sony_sc *sc = container_of(psy, struct sony_sc, battery);
+	struct sony_sc *sc = power_supply_get_drvdata(psy);
 	unsigned long flags;
 	int ret = 0;
 	u8 battery_charging, battery_capacity, cable_state;
@@ -1699,6 +1700,7 @@
 
 static int sony_battery_probe(struct sony_sc *sc)
 {
+	struct power_supply_config psy_cfg = { .drv_data = sc, };
 	struct hid_device *hdev = sc->hdev;
 	int ret;
 
@@ -1708,39 +1710,42 @@
 	 */
 	sc->battery_capacity = 100;
 
-	sc->battery.properties = sony_battery_props;
-	sc->battery.num_properties = ARRAY_SIZE(sony_battery_props);
-	sc->battery.get_property = sony_battery_get_property;
-	sc->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	sc->battery.use_for_apm = 0;
-	sc->battery.name = kasprintf(GFP_KERNEL, "sony_controller_battery_%pMR",
-				     sc->mac_address);
-	if (!sc->battery.name)
+	sc->battery_desc.properties = sony_battery_props;
+	sc->battery_desc.num_properties = ARRAY_SIZE(sony_battery_props);
+	sc->battery_desc.get_property = sony_battery_get_property;
+	sc->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+	sc->battery_desc.use_for_apm = 0;
+	sc->battery_desc.name = kasprintf(GFP_KERNEL,
+					  "sony_controller_battery_%pMR",
+					  sc->mac_address);
+	if (!sc->battery_desc.name)
 		return -ENOMEM;
 
-	ret = power_supply_register(&hdev->dev, &sc->battery);
-	if (ret) {
+	sc->battery = power_supply_register(&hdev->dev, &sc->battery_desc,
+					    &psy_cfg);
+	if (IS_ERR(sc->battery)) {
+		ret = PTR_ERR(sc->battery);
 		hid_err(hdev, "Unable to register battery device\n");
 		goto err_free;
 	}
 
-	power_supply_powers(&sc->battery, &hdev->dev);
+	power_supply_powers(sc->battery, &hdev->dev);
 	return 0;
 
 err_free:
-	kfree(sc->battery.name);
-	sc->battery.name = NULL;
+	kfree(sc->battery_desc.name);
+	sc->battery_desc.name = NULL;
 	return ret;
 }
 
 static void sony_battery_remove(struct sony_sc *sc)
 {
-	if (!sc->battery.name)
+	if (!sc->battery_desc.name)
 		return;
 
-	power_supply_unregister(&sc->battery);
-	kfree(sc->battery.name);
-	sc->battery.name = NULL;
+	power_supply_unregister(sc->battery);
+	kfree(sc->battery_desc.name);
+	sc->battery_desc.name = NULL;
 }
 
 /*
diff --git a/drivers/hid/hid-wiimote-modules.c b/drivers/hid/hid-wiimote-modules.c
index 6b61f01..05e23c4 100644
--- a/drivers/hid/hid-wiimote-modules.c
+++ b/drivers/hid/hid-wiimote-modules.c
@@ -203,8 +203,7 @@
 				       enum power_supply_property psp,
 				       union power_supply_propval *val)
 {
-	struct wiimote_data *wdata = container_of(psy, struct wiimote_data,
-						  battery);
+	struct wiimote_data *wdata = power_supply_get_drvdata(psy);
 	int ret = 0, state;
 	unsigned long flags;
 
@@ -238,42 +237,46 @@
 static int wiimod_battery_probe(const struct wiimod_ops *ops,
 				struct wiimote_data *wdata)
 {
+	struct power_supply_config psy_cfg = { .drv_data = wdata, };
 	int ret;
 
-	wdata->battery.properties = wiimod_battery_props;
-	wdata->battery.num_properties = ARRAY_SIZE(wiimod_battery_props);
-	wdata->battery.get_property = wiimod_battery_get_property;
-	wdata->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	wdata->battery.use_for_apm = 0;
-	wdata->battery.name = kasprintf(GFP_KERNEL, "wiimote_battery_%s",
-					wdata->hdev->uniq);
-	if (!wdata->battery.name)
+	wdata->battery_desc.properties = wiimod_battery_props;
+	wdata->battery_desc.num_properties = ARRAY_SIZE(wiimod_battery_props);
+	wdata->battery_desc.get_property = wiimod_battery_get_property;
+	wdata->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+	wdata->battery_desc.use_for_apm = 0;
+	wdata->battery_desc.name = kasprintf(GFP_KERNEL, "wiimote_battery_%s",
+					     wdata->hdev->uniq);
+	if (!wdata->battery_desc.name)
 		return -ENOMEM;
 
-	ret = power_supply_register(&wdata->hdev->dev, &wdata->battery);
-	if (ret) {
+	wdata->battery = power_supply_register(&wdata->hdev->dev,
+					       &wdata->battery_desc,
+					       &psy_cfg);
+	if (IS_ERR(wdata->battery)) {
 		hid_err(wdata->hdev, "cannot register battery device\n");
+		ret = PTR_ERR(wdata->battery);
 		goto err_free;
 	}
 
-	power_supply_powers(&wdata->battery, &wdata->hdev->dev);
+	power_supply_powers(wdata->battery, &wdata->hdev->dev);
 	return 0;
 
 err_free:
-	kfree(wdata->battery.name);
-	wdata->battery.name = NULL;
+	kfree(wdata->battery_desc.name);
+	wdata->battery_desc.name = NULL;
 	return ret;
 }
 
 static void wiimod_battery_remove(const struct wiimod_ops *ops,
 				  struct wiimote_data *wdata)
 {
-	if (!wdata->battery.name)
+	if (!wdata->battery_desc.name)
 		return;
 
-	power_supply_unregister(&wdata->battery);
-	kfree(wdata->battery.name);
-	wdata->battery.name = NULL;
+	power_supply_unregister(wdata->battery);
+	kfree(wdata->battery_desc.name);
+	wdata->battery_desc.name = NULL;
 }
 
 static const struct wiimod_ops wiimod_battery = {
diff --git a/drivers/hid/hid-wiimote.h b/drivers/hid/hid-wiimote.h
index 10934aa..875694d 100644
--- a/drivers/hid/hid-wiimote.h
+++ b/drivers/hid/hid-wiimote.h
@@ -147,7 +147,8 @@
 	struct led_classdev *leds[4];
 	struct input_dev *accel;
 	struct input_dev *ir;
-	struct power_supply battery;
+	struct power_supply *battery;
+	struct power_supply_desc battery_desc;
 	struct input_dev *mp;
 	struct timer_list timer;
 	struct wiimote_debug *debug;
diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
index 7db4328..0d0d0dd 100644
--- a/drivers/hid/wacom.h
+++ b/drivers/hid/wacom.h
@@ -119,8 +119,10 @@
 		u8 img_lum;   /* OLED matrix display brightness */
 	} led;
 	bool led_initialized;
-	struct power_supply battery;
-	struct power_supply ac;
+	struct power_supply *battery;
+	struct power_supply *ac;
+	struct power_supply_desc battery_desc;
+	struct power_supply_desc ac_desc;
 };
 
 static inline void wacom_schedule_work(struct wacom_wac *wacom_wac)
@@ -133,7 +135,7 @@
 {
 	struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac);
 
-	power_supply_changed(&wacom->battery);
+	power_supply_changed(wacom->battery);
 }
 
 extern const struct hid_device_id wacom_ids[];
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index f0568a7..ba9af47 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -944,7 +944,7 @@
 				      enum power_supply_property psp,
 				      union power_supply_propval *val)
 {
-	struct wacom *wacom = container_of(psy, struct wacom, battery);
+	struct wacom *wacom = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	switch (psp) {
@@ -976,7 +976,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct wacom *wacom = container_of(psy, struct wacom, ac);
+	struct wacom *wacom = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	switch (psp) {
@@ -998,42 +998,46 @@
 static int wacom_initialize_battery(struct wacom *wacom)
 {
 	static atomic_t battery_no = ATOMIC_INIT(0);
-	int error;
+	struct power_supply_config psy_cfg = { .drv_data = wacom, };
 	unsigned long n;
 
 	if (wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) {
+		struct power_supply_desc *bat_desc = &wacom->battery_desc;
+		struct power_supply_desc *ac_desc = &wacom->ac_desc;
 		n = atomic_inc_return(&battery_no) - 1;
 
-		wacom->battery.properties = wacom_battery_props;
-		wacom->battery.num_properties = ARRAY_SIZE(wacom_battery_props);
-		wacom->battery.get_property = wacom_battery_get_property;
+		bat_desc->properties = wacom_battery_props;
+		bat_desc->num_properties = ARRAY_SIZE(wacom_battery_props);
+		bat_desc->get_property = wacom_battery_get_property;
 		sprintf(wacom->wacom_wac.bat_name, "wacom_battery_%ld", n);
-		wacom->battery.name = wacom->wacom_wac.bat_name;
-		wacom->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-		wacom->battery.use_for_apm = 0;
+		bat_desc->name = wacom->wacom_wac.bat_name;
+		bat_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+		bat_desc->use_for_apm = 0;
 
-		wacom->ac.properties = wacom_ac_props;
-		wacom->ac.num_properties = ARRAY_SIZE(wacom_ac_props);
-		wacom->ac.get_property = wacom_ac_get_property;
+		ac_desc->properties = wacom_ac_props;
+		ac_desc->num_properties = ARRAY_SIZE(wacom_ac_props);
+		ac_desc->get_property = wacom_ac_get_property;
 		sprintf(wacom->wacom_wac.ac_name, "wacom_ac_%ld", n);
-		wacom->ac.name = wacom->wacom_wac.ac_name;
-		wacom->ac.type = POWER_SUPPLY_TYPE_MAINS;
-		wacom->ac.use_for_apm = 0;
+		ac_desc->name = wacom->wacom_wac.ac_name;
+		ac_desc->type = POWER_SUPPLY_TYPE_MAINS;
+		ac_desc->use_for_apm = 0;
 
-		error = power_supply_register(&wacom->hdev->dev,
-					      &wacom->battery);
-		if (error)
-			return error;
+		wacom->battery = power_supply_register(&wacom->hdev->dev,
+					      &wacom->battery_desc, &psy_cfg);
+		if (IS_ERR(wacom->battery))
+			return PTR_ERR(wacom->battery);
 
-		power_supply_powers(&wacom->battery, &wacom->hdev->dev);
+		power_supply_powers(wacom->battery, &wacom->hdev->dev);
 
-		error = power_supply_register(&wacom->hdev->dev, &wacom->ac);
-		if (error) {
-			power_supply_unregister(&wacom->battery);
-			return error;
+		wacom->ac = power_supply_register(&wacom->hdev->dev,
+						  &wacom->ac_desc,
+						  &psy_cfg);
+		if (IS_ERR(wacom->ac)) {
+			power_supply_unregister(wacom->battery);
+			return PTR_ERR(wacom->ac);
 		}
 
-		power_supply_powers(&wacom->ac, &wacom->hdev->dev);
+		power_supply_powers(wacom->ac, &wacom->hdev->dev);
 	}
 
 	return 0;
@@ -1042,11 +1046,11 @@
 static void wacom_destroy_battery(struct wacom *wacom)
 {
 	if ((wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) &&
-	     wacom->battery.dev) {
-		power_supply_unregister(&wacom->battery);
-		wacom->battery.dev = NULL;
-		power_supply_unregister(&wacom->ac);
-		wacom->ac.dev = NULL;
+	     wacom->battery) {
+		power_supply_unregister(wacom->battery);
+		wacom->battery = NULL;
+		power_supply_unregister(wacom->ac);
+		wacom->ac = NULL;
 	}
 }
 
diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig
index bc60dec..d612620 100644
--- a/drivers/hsi/clients/Kconfig
+++ b/drivers/hsi/clients/Kconfig
@@ -6,13 +6,23 @@
 
 config NOKIA_MODEM
 	tristate "Nokia Modem"
-	depends on HSI && SSI_PROTOCOL
+	depends on HSI && SSI_PROTOCOL && CMT_SPEECH
 	help
 	Say Y here if you want to add support for the modem on Nokia
 	N900 (Nokia RX-51) hardware.
 
 	If unsure, say N.
 
+config CMT_SPEECH
+	tristate "CMT speech"
+	depends on HSI && SSI_PROTOCOL
+	help
+	If you say Y here, you will enable the CMT speech protocol used
+	by Nokia modems. If you say M the protocol will be available as
+	module named cmt_speech.
+
+	If unsure, say N.
+
 config SSI_PROTOCOL
 	tristate "SSI protocol"
 	depends on HSI && PHONET && OMAP_SSI
diff --git a/drivers/hsi/clients/Makefile b/drivers/hsi/clients/Makefile
index 4d5bc0e..2607232 100644
--- a/drivers/hsi/clients/Makefile
+++ b/drivers/hsi/clients/Makefile
@@ -4,4 +4,5 @@
 
 obj-$(CONFIG_NOKIA_MODEM)	+= nokia-modem.o
 obj-$(CONFIG_SSI_PROTOCOL)	+= ssi_protocol.o
+obj-$(CONFIG_CMT_SPEECH)	+= cmt_speech.o
 obj-$(CONFIG_HSI_CHAR)		+= hsi_char.o
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
new file mode 100644
index 0000000..4983529
--- /dev/null
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -0,0 +1,1457 @@
+/*
+ * cmt_speech.c - HSI CMT speech driver
+ *
+ * Copyright (C) 2008,2009,2010 Nokia Corporation. All rights reserved.
+ *
+ * Contact: Kai Vehmanen <kai.vehmanen@nokia.com>
+ * Original author: Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/ioctl.h>
+#include <linux/uaccess.h>
+#include <linux/pm_qos.h>
+#include <linux/hsi/hsi.h>
+#include <linux/hsi/ssi_protocol.h>
+#include <linux/hsi/cs-protocol.h>
+
+#define CS_MMAP_SIZE	PAGE_SIZE
+
+struct char_queue {
+	struct list_head	list;
+	u32			msg;
+};
+
+struct cs_char {
+	unsigned int		opened;
+	struct hsi_client	*cl;
+	struct cs_hsi_iface	*hi;
+	struct list_head	chardev_queue;
+	struct list_head	dataind_queue;
+	int			dataind_pending;
+	/* mmap things */
+	unsigned long		mmap_base;
+	unsigned long		mmap_size;
+	spinlock_t		lock;
+	struct fasync_struct	*async_queue;
+	wait_queue_head_t	wait;
+	/* hsi channel ids */
+	int                     channel_id_cmd;
+	int                     channel_id_data;
+};
+
+#define SSI_CHANNEL_STATE_READING	1
+#define SSI_CHANNEL_STATE_WRITING	(1 << 1)
+#define SSI_CHANNEL_STATE_POLL		(1 << 2)
+#define SSI_CHANNEL_STATE_ERROR		(1 << 3)
+
+#define TARGET_MASK			0xf000000
+#define TARGET_REMOTE			(1 << CS_DOMAIN_SHIFT)
+#define TARGET_LOCAL			0
+
+/* Number of pre-allocated commands buffers */
+#define CS_MAX_CMDS		        4
+
+/*
+ * During data transfers, transactions must be handled
+ * within 20ms (fixed value in cmtspeech HSI protocol)
+ */
+#define CS_QOS_LATENCY_FOR_DATA_USEC	20000
+
+/* Timeout to wait for pending HSI transfers to complete */
+#define CS_HSI_TRANSFER_TIMEOUT_MS      500
+
+
+#define RX_PTR_BOUNDARY_SHIFT		8
+#define RX_PTR_MAX_SHIFT		(RX_PTR_BOUNDARY_SHIFT + \
+						CS_MAX_BUFFERS_SHIFT)
+struct cs_hsi_iface {
+	struct hsi_client		*cl;
+	struct hsi_client		*master;
+
+	unsigned int			iface_state;
+	unsigned int			wakeline_state;
+	unsigned int			control_state;
+	unsigned int			data_state;
+
+	/* state exposed to application */
+	struct cs_mmap_config_block	*mmap_cfg;
+
+	unsigned long			mmap_base;
+	unsigned long			mmap_size;
+
+	unsigned int			rx_slot;
+	unsigned int			tx_slot;
+
+	/* note: for security reasons, we do not trust the contents of
+	 * mmap_cfg, but instead duplicate the variables here */
+	unsigned int			buf_size;
+	unsigned int			rx_bufs;
+	unsigned int			tx_bufs;
+	unsigned int			rx_ptr_boundary;
+	unsigned int			rx_offsets[CS_MAX_BUFFERS];
+	unsigned int			tx_offsets[CS_MAX_BUFFERS];
+
+	/* size of aligned memory blocks */
+	unsigned int			slot_size;
+	unsigned int			flags;
+
+	struct list_head		cmdqueue;
+
+	struct hsi_msg			*data_rx_msg;
+	struct hsi_msg			*data_tx_msg;
+	wait_queue_head_t		datawait;
+
+	struct pm_qos_request           pm_qos_req;
+
+	spinlock_t			lock;
+};
+
+static struct cs_char cs_char_data;
+
+static void cs_hsi_read_on_control(struct cs_hsi_iface *hi);
+static void cs_hsi_read_on_data(struct cs_hsi_iface *hi);
+
+static inline void rx_ptr_shift_too_big(void)
+{
+	BUILD_BUG_ON((1LLU << RX_PTR_MAX_SHIFT) > UINT_MAX);
+}
+
+static void cs_notify(u32 message, struct list_head *head)
+{
+	struct char_queue *entry;
+
+	spin_lock(&cs_char_data.lock);
+
+	if (!cs_char_data.opened) {
+		spin_unlock(&cs_char_data.lock);
+		goto out;
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry) {
+		dev_err(&cs_char_data.cl->device,
+			"Can't allocate new entry for the queue.\n");
+		spin_unlock(&cs_char_data.lock);
+		goto out;
+	}
+
+	entry->msg = message;
+	list_add_tail(&entry->list, head);
+
+	spin_unlock(&cs_char_data.lock);
+
+	wake_up_interruptible(&cs_char_data.wait);
+	kill_fasync(&cs_char_data.async_queue, SIGIO, POLL_IN);
+
+out:
+	return;
+}
+
+static u32 cs_pop_entry(struct list_head *head)
+{
+	struct char_queue *entry;
+	u32 data;
+
+	entry = list_entry(head->next, struct char_queue, list);
+	data = entry->msg;
+	list_del(&entry->list);
+	kfree(entry);
+
+	return data;
+}
+
+static void cs_notify_control(u32 message)
+{
+	cs_notify(message, &cs_char_data.chardev_queue);
+}
+
+static void cs_notify_data(u32 message, int maxlength)
+{
+	cs_notify(message, &cs_char_data.dataind_queue);
+
+	spin_lock(&cs_char_data.lock);
+	cs_char_data.dataind_pending++;
+	while (cs_char_data.dataind_pending > maxlength &&
+				!list_empty(&cs_char_data.dataind_queue)) {
+		dev_dbg(&cs_char_data.cl->device, "data notification "
+		"queue overrun (%u entries)\n", cs_char_data.dataind_pending);
+
+		cs_pop_entry(&cs_char_data.dataind_queue);
+		cs_char_data.dataind_pending--;
+	}
+	spin_unlock(&cs_char_data.lock);
+}
+
+static inline void cs_set_cmd(struct hsi_msg *msg, u32 cmd)
+{
+	u32 *data = sg_virt(msg->sgt.sgl);
+	*data = cmd;
+}
+
+static inline u32 cs_get_cmd(struct hsi_msg *msg)
+{
+	u32 *data = sg_virt(msg->sgt.sgl);
+	return *data;
+}
+
+static void cs_release_cmd(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+
+	list_add_tail(&msg->link, &hi->cmdqueue);
+}
+
+static void cs_cmd_destructor(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+
+	spin_lock(&hi->lock);
+
+	dev_dbg(&cs_char_data.cl->device, "control cmd destructor\n");
+
+	if (hi->iface_state != CS_STATE_CLOSED)
+		dev_err(&hi->cl->device, "Cmd flushed while driver active\n");
+
+	if (msg->ttype == HSI_MSG_READ)
+		hi->control_state &=
+			~(SSI_CHANNEL_STATE_POLL | SSI_CHANNEL_STATE_READING);
+	else if (msg->ttype == HSI_MSG_WRITE &&
+			hi->control_state & SSI_CHANNEL_STATE_WRITING)
+		hi->control_state &= ~SSI_CHANNEL_STATE_WRITING;
+
+	cs_release_cmd(msg);
+
+	spin_unlock(&hi->lock);
+}
+
+static struct hsi_msg *cs_claim_cmd(struct cs_hsi_iface* ssi)
+{
+	struct hsi_msg *msg;
+
+	BUG_ON(list_empty(&ssi->cmdqueue));
+
+	msg = list_first_entry(&ssi->cmdqueue, struct hsi_msg, link);
+	list_del(&msg->link);
+	msg->destructor = cs_cmd_destructor;
+
+	return msg;
+}
+
+static void cs_free_cmds(struct cs_hsi_iface *ssi)
+{
+	struct hsi_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &ssi->cmdqueue, link) {
+		list_del(&msg->link);
+		msg->destructor = NULL;
+		kfree(sg_virt(msg->sgt.sgl));
+		hsi_free_msg(msg);
+	}
+}
+
+static int cs_alloc_cmds(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *msg;
+	u32 *buf;
+	unsigned int i;
+
+	INIT_LIST_HEAD(&hi->cmdqueue);
+
+	for (i = 0; i < CS_MAX_CMDS; i++) {
+		msg = hsi_alloc_msg(1, GFP_KERNEL);
+		if (!msg)
+			goto out;
+		buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+		if (!buf) {
+			hsi_free_msg(msg);
+			goto out;
+		}
+		sg_init_one(msg->sgt.sgl, buf, sizeof(*buf));
+		msg->channel = cs_char_data.channel_id_cmd;
+		msg->context = hi;
+		list_add_tail(&msg->link, &hi->cmdqueue);
+	}
+
+	return 0;
+
+out:
+	cs_free_cmds(hi);
+	return -ENOMEM;
+}
+
+static void cs_hsi_data_destructor(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	const char *dir = (msg->ttype == HSI_MSG_READ) ? "TX" : "RX";
+
+	dev_dbg(&cs_char_data.cl->device, "Freeing data %s message\n", dir);
+
+	spin_lock(&hi->lock);
+	if (hi->iface_state != CS_STATE_CLOSED)
+		dev_err(&cs_char_data.cl->device,
+				"Data %s flush while device active\n", dir);
+	if (msg->ttype == HSI_MSG_READ)
+		hi->data_state &=
+			~(SSI_CHANNEL_STATE_POLL | SSI_CHANNEL_STATE_READING);
+	else
+		hi->data_state &= ~SSI_CHANNEL_STATE_WRITING;
+
+	msg->status = HSI_STATUS_COMPLETED;
+	if (unlikely(waitqueue_active(&hi->datawait)))
+		wake_up_interruptible(&hi->datawait);
+
+	spin_unlock(&hi->lock);
+}
+
+static int cs_hsi_alloc_data(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *txmsg, *rxmsg;
+	int res = 0;
+
+	rxmsg = hsi_alloc_msg(1, GFP_KERNEL);
+	if (!rxmsg) {
+		res = -ENOMEM;
+		goto out1;
+	}
+	rxmsg->channel = cs_char_data.channel_id_data;
+	rxmsg->destructor = cs_hsi_data_destructor;
+	rxmsg->context = hi;
+
+	txmsg = hsi_alloc_msg(1, GFP_KERNEL);
+	if (!txmsg) {
+		res = -ENOMEM;
+		goto out2;
+	}
+	txmsg->channel = cs_char_data.channel_id_data;
+	txmsg->destructor = cs_hsi_data_destructor;
+	txmsg->context = hi;
+
+	hi->data_rx_msg = rxmsg;
+	hi->data_tx_msg = txmsg;
+
+	return 0;
+
+out2:
+	hsi_free_msg(rxmsg);
+out1:
+	return res;
+}
+
+static void cs_hsi_free_data_msg(struct hsi_msg *msg)
+{
+	WARN_ON(msg->status != HSI_STATUS_COMPLETED &&
+					msg->status != HSI_STATUS_ERROR);
+	hsi_free_msg(msg);
+}
+
+static void cs_hsi_free_data(struct cs_hsi_iface *hi)
+{
+	cs_hsi_free_data_msg(hi->data_rx_msg);
+	cs_hsi_free_data_msg(hi->data_tx_msg);
+}
+
+static inline void __cs_hsi_error_pre(struct cs_hsi_iface *hi,
+					struct hsi_msg *msg, const char *info,
+					unsigned int *state)
+{
+	spin_lock(&hi->lock);
+	dev_err(&hi->cl->device, "HSI %s error, msg %d, state %u\n",
+		info, msg->status, *state);
+}
+
+static inline void __cs_hsi_error_post(struct cs_hsi_iface *hi)
+{
+	spin_unlock(&hi->lock);
+}
+
+static inline void __cs_hsi_error_read_bits(unsigned int *state)
+{
+	*state |= SSI_CHANNEL_STATE_ERROR;
+	*state &= ~(SSI_CHANNEL_STATE_READING | SSI_CHANNEL_STATE_POLL);
+}
+
+static inline void __cs_hsi_error_write_bits(unsigned int *state)
+{
+	*state |= SSI_CHANNEL_STATE_ERROR;
+	*state &= ~SSI_CHANNEL_STATE_WRITING;
+}
+
+static void cs_hsi_control_read_error(struct cs_hsi_iface *hi,
+							struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "control read", &hi->control_state);
+	cs_release_cmd(msg);
+	__cs_hsi_error_read_bits(&hi->control_state);
+	__cs_hsi_error_post(hi);
+}
+
+static void cs_hsi_control_write_error(struct cs_hsi_iface *hi,
+							struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "control write", &hi->control_state);
+	cs_release_cmd(msg);
+	__cs_hsi_error_write_bits(&hi->control_state);
+	__cs_hsi_error_post(hi);
+
+}
+
+static void cs_hsi_data_read_error(struct cs_hsi_iface *hi, struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "data read", &hi->data_state);
+	__cs_hsi_error_read_bits(&hi->data_state);
+	__cs_hsi_error_post(hi);
+}
+
+static void cs_hsi_data_write_error(struct cs_hsi_iface *hi,
+							struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "data write", &hi->data_state);
+	__cs_hsi_error_write_bits(&hi->data_state);
+	__cs_hsi_error_post(hi);
+}
+
+static void cs_hsi_read_on_control_complete(struct hsi_msg *msg)
+{
+	u32 cmd = cs_get_cmd(msg);
+	struct cs_hsi_iface *hi = msg->context;
+
+	spin_lock(&hi->lock);
+	hi->control_state &= ~SSI_CHANNEL_STATE_READING;
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&hi->cl->device, "Control RX error detected\n");
+		cs_hsi_control_read_error(hi, msg);
+		spin_unlock(&hi->lock);
+		goto out;
+	}
+	dev_dbg(&hi->cl->device, "Read on control: %08X\n", cmd);
+	cs_release_cmd(msg);
+	if (hi->flags & CS_FEAT_TSTAMP_RX_CTRL) {
+		struct timespec *tstamp =
+			&hi->mmap_cfg->tstamp_rx_ctrl;
+		do_posix_clock_monotonic_gettime(tstamp);
+	}
+	spin_unlock(&hi->lock);
+
+	cs_notify_control(cmd);
+
+out:
+	cs_hsi_read_on_control(hi);
+}
+
+static void cs_hsi_peek_on_control_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	int ret;
+
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&hi->cl->device, "Control peek RX error detected\n");
+		cs_hsi_control_read_error(hi, msg);
+		return;
+	}
+
+	WARN_ON(!(hi->control_state & SSI_CHANNEL_STATE_READING));
+
+	dev_dbg(&hi->cl->device, "Peek on control complete, reading\n");
+	msg->sgt.nents = 1;
+	msg->complete = cs_hsi_read_on_control_complete;
+	ret = hsi_async_read(hi->cl, msg);
+	if (ret)
+		cs_hsi_control_read_error(hi, msg);
+}
+
+static void cs_hsi_read_on_control(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *msg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->control_state & SSI_CHANNEL_STATE_READING) {
+		dev_err(&hi->cl->device, "Control read already pending (%d)\n",
+			hi->control_state);
+		spin_unlock(&hi->lock);
+		return;
+	}
+	if (hi->control_state & SSI_CHANNEL_STATE_ERROR) {
+		dev_err(&hi->cl->device, "Control read error (%d)\n",
+			hi->control_state);
+		spin_unlock(&hi->lock);
+		return;
+	}
+	hi->control_state |= SSI_CHANNEL_STATE_READING;
+	dev_dbg(&hi->cl->device, "Issuing RX on control\n");
+	msg = cs_claim_cmd(hi);
+	spin_unlock(&hi->lock);
+
+	msg->sgt.nents = 0;
+	msg->complete = cs_hsi_peek_on_control_complete;
+	ret = hsi_async_read(hi->cl, msg);
+	if (ret)
+		cs_hsi_control_read_error(hi, msg);
+}
+
+static void cs_hsi_write_on_control_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	if (msg->status == HSI_STATUS_COMPLETED) {
+		spin_lock(&hi->lock);
+		hi->control_state &= ~SSI_CHANNEL_STATE_WRITING;
+		cs_release_cmd(msg);
+		spin_unlock(&hi->lock);
+	} else if (msg->status == HSI_STATUS_ERROR) {
+		cs_hsi_control_write_error(hi, msg);
+	} else {
+		dev_err(&hi->cl->device,
+			"unexpected status in control write callback %d\n",
+			msg->status);
+	}
+}
+
+static int cs_hsi_write_on_control(struct cs_hsi_iface *hi, u32 message)
+{
+	struct hsi_msg *msg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->control_state & SSI_CHANNEL_STATE_ERROR) {
+		spin_unlock(&hi->lock);
+		return -EIO;
+	}
+	if (hi->control_state & SSI_CHANNEL_STATE_WRITING) {
+		dev_err(&hi->cl->device,
+			"Write still pending on control channel.\n");
+		spin_unlock(&hi->lock);
+		return -EBUSY;
+	}
+	hi->control_state |= SSI_CHANNEL_STATE_WRITING;
+	msg = cs_claim_cmd(hi);
+	spin_unlock(&hi->lock);
+
+	cs_set_cmd(msg, message);
+	msg->sgt.nents = 1;
+	msg->complete = cs_hsi_write_on_control_complete;
+	dev_dbg(&hi->cl->device,
+		"Sending control message %08X\n", message);
+	ret = hsi_async_write(hi->cl, msg);
+	if (ret) {
+		dev_err(&hi->cl->device,
+			"async_write failed with %d\n", ret);
+		cs_hsi_control_write_error(hi, msg);
+	}
+
+	/*
+	 * Make sure control read is always pending when issuing
+	 * new control writes. This is needed as the controller
+	 * may flush our messages if e.g. the peer device reboots
+	 * unexpectedly (and we cannot directly resubmit a new read from
+	 * the message destructor; see cs_cmd_destructor()).
+	 */
+	if (!(hi->control_state & SSI_CHANNEL_STATE_READING)) {
+		dev_err(&hi->cl->device, "Restarting control reads\n");
+		cs_hsi_read_on_control(hi);
+	}
+
+	return 0;
+}
+
+static void cs_hsi_read_on_data_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	u32 payload;
+
+	if (unlikely(msg->status == HSI_STATUS_ERROR)) {
+		cs_hsi_data_read_error(hi, msg);
+		return;
+	}
+
+	spin_lock(&hi->lock);
+	WARN_ON(!(hi->data_state & SSI_CHANNEL_STATE_READING));
+	hi->data_state &= ~SSI_CHANNEL_STATE_READING;
+	payload = CS_RX_DATA_RECEIVED;
+	payload |= hi->rx_slot;
+	hi->rx_slot++;
+	hi->rx_slot %= hi->rx_ptr_boundary;
+	/* expose current rx ptr in mmap area */
+	hi->mmap_cfg->rx_ptr = hi->rx_slot;
+	if (unlikely(waitqueue_active(&hi->datawait)))
+		wake_up_interruptible(&hi->datawait);
+	spin_unlock(&hi->lock);
+
+	cs_notify_data(payload, hi->rx_bufs);
+	cs_hsi_read_on_data(hi);
+}
+
+static void cs_hsi_peek_on_data_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	u32 *address;
+	int ret;
+
+	if (unlikely(msg->status == HSI_STATUS_ERROR)) {
+		cs_hsi_data_read_error(hi, msg);
+		return;
+	}
+	if (unlikely(hi->iface_state != CS_STATE_CONFIGURED)) {
+		dev_err(&hi->cl->device, "Data received in invalid state\n");
+		cs_hsi_data_read_error(hi, msg);
+		return;
+	}
+
+	spin_lock(&hi->lock);
+	WARN_ON(!(hi->data_state & SSI_CHANNEL_STATE_POLL));
+	hi->data_state &= ~SSI_CHANNEL_STATE_POLL;
+	hi->data_state |= SSI_CHANNEL_STATE_READING;
+	spin_unlock(&hi->lock);
+
+	address = (u32 *)(hi->mmap_base +
+				hi->rx_offsets[hi->rx_slot % hi->rx_bufs]);
+	sg_init_one(msg->sgt.sgl, address, hi->buf_size);
+	msg->sgt.nents = 1;
+	msg->complete = cs_hsi_read_on_data_complete;
+	ret = hsi_async_read(hi->cl, msg);
+	if (ret)
+		cs_hsi_data_read_error(hi, msg);
+}
+
+/*
+ * Read/write transaction is ongoing. Returns false if in
+ * SSI_CHANNEL_STATE_POLL state.
+ */
+static inline int cs_state_xfer_active(unsigned int state)
+{
+	return (state & SSI_CHANNEL_STATE_WRITING) ||
+		(state & SSI_CHANNEL_STATE_READING);
+}
+
+/*
+ * No pending read/writes
+ */
+static inline int cs_state_idle(unsigned int state)
+{
+	return !(state & ~SSI_CHANNEL_STATE_ERROR);
+}
+
+static void cs_hsi_read_on_data(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *rxmsg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->data_state &
+		(SSI_CHANNEL_STATE_READING | SSI_CHANNEL_STATE_POLL)) {
+		dev_dbg(&hi->cl->device, "Data read already pending (%u)\n",
+			hi->data_state);
+		spin_unlock(&hi->lock);
+		return;
+	}
+	hi->data_state |= SSI_CHANNEL_STATE_POLL;
+	spin_unlock(&hi->lock);
+
+	rxmsg = hi->data_rx_msg;
+	sg_init_one(rxmsg->sgt.sgl, (void *)hi->mmap_base, 0);
+	rxmsg->sgt.nents = 0;
+	rxmsg->complete = cs_hsi_peek_on_data_complete;
+
+	ret = hsi_async_read(hi->cl, rxmsg);
+	if (ret)
+		cs_hsi_data_read_error(hi, rxmsg);
+}
+
+static void cs_hsi_write_on_data_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+
+	if (msg->status == HSI_STATUS_COMPLETED) {
+		spin_lock(&hi->lock);
+		hi->data_state &= ~SSI_CHANNEL_STATE_WRITING;
+		if (unlikely(waitqueue_active(&hi->datawait)))
+			wake_up_interruptible(&hi->datawait);
+		spin_unlock(&hi->lock);
+	} else {
+		cs_hsi_data_write_error(hi, msg);
+	}
+}
+
+static int cs_hsi_write_on_data(struct cs_hsi_iface *hi, unsigned int slot)
+{
+	u32 *address;
+	struct hsi_msg *txmsg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->iface_state != CS_STATE_CONFIGURED) {
+		dev_err(&hi->cl->device, "Not configured, aborting\n");
+		ret = -EINVAL;
+		goto error;
+	}
+	if (hi->data_state & SSI_CHANNEL_STATE_ERROR) {
+		dev_err(&hi->cl->device, "HSI error, aborting\n");
+		ret = -EIO;
+		goto error;
+	}
+	if (hi->data_state & SSI_CHANNEL_STATE_WRITING) {
+		dev_err(&hi->cl->device, "Write pending on data channel.\n");
+		ret = -EBUSY;
+		goto error;
+	}
+	hi->data_state |= SSI_CHANNEL_STATE_WRITING;
+	spin_unlock(&hi->lock);
+
+	hi->tx_slot = slot;
+	address = (u32 *)(hi->mmap_base + hi->tx_offsets[hi->tx_slot]);
+	txmsg = hi->data_tx_msg;
+	sg_init_one(txmsg->sgt.sgl, address, hi->buf_size);
+	txmsg->complete = cs_hsi_write_on_data_complete;
+	ret = hsi_async_write(hi->cl, txmsg);
+	if (ret)
+		cs_hsi_data_write_error(hi, txmsg);
+
+	return ret;
+
+error:
+	spin_unlock(&hi->lock);
+	if (ret == -EIO)
+		cs_hsi_data_write_error(hi, hi->data_tx_msg);
+
+	return ret;
+}
+
+static unsigned int cs_hsi_get_state(struct cs_hsi_iface *hi)
+{
+	return hi->iface_state;
+}
+
+static int cs_hsi_command(struct cs_hsi_iface *hi, u32 cmd)
+{
+	int ret = 0;
+
+	local_bh_disable();
+	switch (cmd & TARGET_MASK) {
+	case TARGET_REMOTE:
+		ret = cs_hsi_write_on_control(hi, cmd);
+		break;
+	case TARGET_LOCAL:
+		if ((cmd & CS_CMD_MASK) == CS_TX_DATA_READY)
+			ret = cs_hsi_write_on_data(hi, cmd & CS_PARAM_MASK);
+		else
+			ret = -EINVAL;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	local_bh_enable();
+
+	return ret;
+}
+
+static void cs_hsi_set_wakeline(struct cs_hsi_iface *hi, bool new_state)
+{
+	int change = 0;
+
+	spin_lock_bh(&hi->lock);
+	if (hi->wakeline_state != new_state) {
+		hi->wakeline_state = new_state;
+		change = 1;
+		dev_dbg(&hi->cl->device, "setting wake line to %d (%p)\n",
+			new_state, hi->cl);
+	}
+	spin_unlock_bh(&hi->lock);
+
+	if (change) {
+		if (new_state)
+			ssip_slave_start_tx(hi->master);
+		else
+			ssip_slave_stop_tx(hi->master);
+	}
+
+	dev_dbg(&hi->cl->device, "wake line set to %d (%p)\n",
+		new_state, hi->cl);
+}
+
+static void set_buffer_sizes(struct cs_hsi_iface *hi, int rx_bufs, int tx_bufs)
+{
+	hi->rx_bufs = rx_bufs;
+	hi->tx_bufs = tx_bufs;
+	hi->mmap_cfg->rx_bufs = rx_bufs;
+	hi->mmap_cfg->tx_bufs = tx_bufs;
+
+	if (hi->flags & CS_FEAT_ROLLING_RX_COUNTER) {
+		/*
+		 * For more robust overrun detection, let the rx
+		 * pointer run in range 0..'boundary-1'. Boundary
+		 * is a multiple of rx_bufs, and limited in max size
+		 * by RX_PTR_MAX_SHIFT to allow for fast ptr-diff
+		 * calculation.
+		 */
+		hi->rx_ptr_boundary = (rx_bufs << RX_PTR_BOUNDARY_SHIFT);
+		hi->mmap_cfg->rx_ptr_boundary = hi->rx_ptr_boundary;
+	} else {
+		hi->rx_ptr_boundary = hi->rx_bufs;
+	}
+}
+
+static int check_buf_params(struct cs_hsi_iface *hi,
+					const struct cs_buffer_config *buf_cfg)
+{
+	size_t buf_size_aligned = L1_CACHE_ALIGN(buf_cfg->buf_size) *
+					(buf_cfg->rx_bufs + buf_cfg->tx_bufs);
+	size_t ctrl_size_aligned = L1_CACHE_ALIGN(sizeof(*hi->mmap_cfg));
+	int r = 0;
+
+	if (buf_cfg->rx_bufs > CS_MAX_BUFFERS ||
+					buf_cfg->tx_bufs > CS_MAX_BUFFERS) {
+		r = -EINVAL;
+	} else if ((buf_size_aligned + ctrl_size_aligned) >= hi->mmap_size) {
+		dev_err(&hi->cl->device, "No space for the requested buffer "
+			"configuration\n");
+		r = -ENOBUFS;
+	}
+
+	return r;
+}
+
+/**
+ * Block until pending data transfers have completed.
+ */
+static int cs_hsi_data_sync(struct cs_hsi_iface *hi)
+{
+	int r = 0;
+
+	spin_lock_bh(&hi->lock);
+
+	if (!cs_state_xfer_active(hi->data_state)) {
+		dev_dbg(&hi->cl->device, "hsi_data_sync break, idle\n");
+		goto out;
+	}
+
+	for (;;) {
+		int s;
+		DEFINE_WAIT(wait);
+		if (!cs_state_xfer_active(hi->data_state))
+			goto out;
+		if (signal_pending(current)) {
+			r = -ERESTARTSYS;
+			goto out;
+		}
+		/**
+		 * prepare_to_wait must be called with hi->lock held
+		 * so that callbacks can check for waitqueue_active()
+		 */
+		prepare_to_wait(&hi->datawait, &wait, TASK_INTERRUPTIBLE);
+		spin_unlock_bh(&hi->lock);
+		s = schedule_timeout(
+			msecs_to_jiffies(CS_HSI_TRANSFER_TIMEOUT_MS));
+		spin_lock_bh(&hi->lock);
+		finish_wait(&hi->datawait, &wait);
+		if (!s) {
+			dev_dbg(&hi->cl->device,
+				"hsi_data_sync timeout after %d ms\n",
+				CS_HSI_TRANSFER_TIMEOUT_MS);
+			r = -EIO;
+			goto out;
+		}
+	}
+
+out:
+	spin_unlock_bh(&hi->lock);
+	dev_dbg(&hi->cl->device, "hsi_data_sync done with res %d\n", r);
+
+	return r;
+}
+
+static void cs_hsi_data_enable(struct cs_hsi_iface *hi,
+					struct cs_buffer_config *buf_cfg)
+{
+	unsigned int data_start, i;
+
+	BUG_ON(hi->buf_size == 0);
+
+	set_buffer_sizes(hi, buf_cfg->rx_bufs, buf_cfg->tx_bufs);
+
+	hi->slot_size = L1_CACHE_ALIGN(hi->buf_size);
+	dev_dbg(&hi->cl->device,
+			"setting slot size to %u, buf size %u, align %u\n",
+			hi->slot_size, hi->buf_size, L1_CACHE_BYTES);
+
+	data_start = L1_CACHE_ALIGN(sizeof(*hi->mmap_cfg));
+	dev_dbg(&hi->cl->device,
+			"setting data start at %u, cfg block %u, align %u\n",
+			data_start, sizeof(*hi->mmap_cfg), L1_CACHE_BYTES);
+
+	for (i = 0; i < hi->mmap_cfg->rx_bufs; i++) {
+		hi->rx_offsets[i] = data_start + i * hi->slot_size;
+		hi->mmap_cfg->rx_offsets[i] = hi->rx_offsets[i];
+		dev_dbg(&hi->cl->device, "DL buf #%u at %u\n",
+					i, hi->rx_offsets[i]);
+	}
+	for (i = 0; i < hi->mmap_cfg->tx_bufs; i++) {
+		hi->tx_offsets[i] = data_start +
+			(i + hi->mmap_cfg->rx_bufs) * hi->slot_size;
+		hi->mmap_cfg->tx_offsets[i] = hi->tx_offsets[i];
+		dev_dbg(&hi->cl->device, "UL buf #%u at %u\n",
+					i, hi->rx_offsets[i]);
+	}
+
+	hi->iface_state = CS_STATE_CONFIGURED;
+}
+
+static void cs_hsi_data_disable(struct cs_hsi_iface *hi, int old_state)
+{
+	if (old_state == CS_STATE_CONFIGURED) {
+		dev_dbg(&hi->cl->device,
+			"closing data channel with slot size 0\n");
+		hi->iface_state = CS_STATE_OPENED;
+	}
+}
+
+static int cs_hsi_buf_config(struct cs_hsi_iface *hi,
+					struct cs_buffer_config *buf_cfg)
+{
+	int r = 0;
+	unsigned int old_state = hi->iface_state;
+
+	spin_lock_bh(&hi->lock);
+	/* Prevent new transactions during buffer reconfig */
+	if (old_state == CS_STATE_CONFIGURED)
+		hi->iface_state = CS_STATE_OPENED;
+	spin_unlock_bh(&hi->lock);
+
+	/*
+	 * make sure that no non-zero data reads are ongoing before
+	 * proceeding to change the buffer layout
+	 */
+	r = cs_hsi_data_sync(hi);
+	if (r < 0)
+		return r;
+
+	WARN_ON(cs_state_xfer_active(hi->data_state));
+
+	spin_lock_bh(&hi->lock);
+	r = check_buf_params(hi, buf_cfg);
+	if (r < 0)
+		goto error;
+
+	hi->buf_size = buf_cfg->buf_size;
+	hi->mmap_cfg->buf_size = hi->buf_size;
+	hi->flags = buf_cfg->flags;
+
+	hi->rx_slot = 0;
+	hi->tx_slot = 0;
+	hi->slot_size = 0;
+
+	if (hi->buf_size)
+		cs_hsi_data_enable(hi, buf_cfg);
+	else
+		cs_hsi_data_disable(hi, old_state);
+
+	spin_unlock_bh(&hi->lock);
+
+	if (old_state != hi->iface_state) {
+		if (hi->iface_state == CS_STATE_CONFIGURED) {
+			pm_qos_add_request(&hi->pm_qos_req,
+				PM_QOS_CPU_DMA_LATENCY,
+				CS_QOS_LATENCY_FOR_DATA_USEC);
+			local_bh_disable();
+			cs_hsi_read_on_data(hi);
+			local_bh_enable();
+		} else if (old_state == CS_STATE_CONFIGURED) {
+			pm_qos_remove_request(&hi->pm_qos_req);
+		}
+	}
+	return r;
+
+error:
+	spin_unlock_bh(&hi->lock);
+	return r;
+}
+
+static int cs_hsi_start(struct cs_hsi_iface **hi, struct hsi_client *cl,
+			unsigned long mmap_base, unsigned long mmap_size)
+{
+	int err = 0;
+	struct cs_hsi_iface *hsi_if = kzalloc(sizeof(*hsi_if), GFP_KERNEL);
+
+	dev_dbg(&cl->device, "cs_hsi_start\n");
+
+	if (!hsi_if) {
+		err = -ENOMEM;
+		goto leave0;
+	}
+	spin_lock_init(&hsi_if->lock);
+	hsi_if->cl = cl;
+	hsi_if->iface_state = CS_STATE_CLOSED;
+	hsi_if->mmap_cfg = (struct cs_mmap_config_block *)mmap_base;
+	hsi_if->mmap_base = mmap_base;
+	hsi_if->mmap_size = mmap_size;
+	memset(hsi_if->mmap_cfg, 0, sizeof(*hsi_if->mmap_cfg));
+	init_waitqueue_head(&hsi_if->datawait);
+	err = cs_alloc_cmds(hsi_if);
+	if (err < 0) {
+		dev_err(&cl->device, "Unable to alloc HSI messages\n");
+		goto leave1;
+	}
+	err = cs_hsi_alloc_data(hsi_if);
+	if (err < 0) {
+		dev_err(&cl->device, "Unable to alloc HSI messages for data\n");
+		goto leave2;
+	}
+	err = hsi_claim_port(cl, 1);
+	if (err < 0) {
+		dev_err(&cl->device,
+				"Could not open, HSI port already claimed\n");
+		goto leave3;
+	}
+	hsi_if->master = ssip_slave_get_master(cl);
+	if (IS_ERR(hsi_if->master)) {
+		err = PTR_ERR(hsi_if->master);
+		dev_err(&cl->device, "Could not get HSI master client\n");
+		goto leave4;
+	}
+	if (!ssip_slave_running(hsi_if->master)) {
+		err = -ENODEV;
+		dev_err(&cl->device,
+				"HSI port not initialized\n");
+		goto leave4;
+	}
+
+	hsi_if->iface_state = CS_STATE_OPENED;
+	local_bh_disable();
+	cs_hsi_read_on_control(hsi_if);
+	local_bh_enable();
+
+	dev_dbg(&cl->device, "cs_hsi_start...done\n");
+
+	BUG_ON(!hi);
+	*hi = hsi_if;
+
+	return 0;
+
+leave4:
+	hsi_release_port(cl);
+leave3:
+	cs_hsi_free_data(hsi_if);
+leave2:
+	cs_free_cmds(hsi_if);
+leave1:
+	kfree(hsi_if);
+leave0:
+	dev_dbg(&cl->device, "cs_hsi_start...done/error\n\n");
+
+	return err;
+}
+
+static void cs_hsi_stop(struct cs_hsi_iface *hi)
+{
+	dev_dbg(&hi->cl->device, "cs_hsi_stop\n");
+	cs_hsi_set_wakeline(hi, 0);
+	ssip_slave_put_master(hi->master);
+
+	/* hsi_release_port() needs to be called with CS_STATE_CLOSED */
+	hi->iface_state = CS_STATE_CLOSED;
+	hsi_release_port(hi->cl);
+
+	/*
+	 * hsi_release_port() should flush out all the pending
+	 * messages, so cs_state_idle() should be true for both
+	 * control and data channels.
+	 */
+	WARN_ON(!cs_state_idle(hi->control_state));
+	WARN_ON(!cs_state_idle(hi->data_state));
+
+	if (pm_qos_request_active(&hi->pm_qos_req))
+		pm_qos_remove_request(&hi->pm_qos_req);
+
+	spin_lock_bh(&hi->lock);
+	cs_hsi_free_data(hi);
+	cs_free_cmds(hi);
+	spin_unlock_bh(&hi->lock);
+	kfree(hi);
+}
+
+static int cs_char_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct cs_char *csdata = vma->vm_private_data;
+	struct page *page;
+
+	page = virt_to_page(csdata->mmap_base);
+	get_page(page);
+	vmf->page = page;
+
+	return 0;
+}
+
+static struct vm_operations_struct cs_char_vm_ops = {
+	.fault	= cs_char_vma_fault,
+};
+
+static int cs_char_fasync(int fd, struct file *file, int on)
+{
+	struct cs_char *csdata = file->private_data;
+
+	if (fasync_helper(fd, file, on, &csdata->async_queue) < 0)
+		return -EIO;
+
+	return 0;
+}
+
+static unsigned int cs_char_poll(struct file *file, poll_table *wait)
+{
+	struct cs_char *csdata = file->private_data;
+	unsigned int ret = 0;
+
+	poll_wait(file, &cs_char_data.wait, wait);
+	spin_lock_bh(&csdata->lock);
+	if (!list_empty(&csdata->chardev_queue))
+		ret = POLLIN | POLLRDNORM;
+	else if (!list_empty(&csdata->dataind_queue))
+		ret = POLLIN | POLLRDNORM;
+	spin_unlock_bh(&csdata->lock);
+
+	return ret;
+}
+
+static ssize_t cs_char_read(struct file *file, char __user *buf, size_t count,
+								loff_t *unused)
+{
+	struct cs_char *csdata = file->private_data;
+	u32 data;
+	ssize_t retval;
+
+	if (count < sizeof(data))
+		return -EINVAL;
+
+	for (;;) {
+		DEFINE_WAIT(wait);
+
+		spin_lock_bh(&csdata->lock);
+		if (!list_empty(&csdata->chardev_queue)) {
+			data = cs_pop_entry(&csdata->chardev_queue);
+		} else if (!list_empty(&csdata->dataind_queue)) {
+			data = cs_pop_entry(&csdata->dataind_queue);
+			csdata->dataind_pending--;
+		} else {
+			data = 0;
+		}
+		spin_unlock_bh(&csdata->lock);
+
+		if (data)
+			break;
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		} else if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+		prepare_to_wait_exclusive(&csdata->wait, &wait,
+						TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&csdata->wait, &wait);
+	}
+
+	retval = put_user(data, (u32 __user *)buf);
+	if (!retval)
+		retval = sizeof(data);
+
+out:
+	return retval;
+}
+
+static ssize_t cs_char_write(struct file *file, const char __user *buf,
+						size_t count, loff_t *unused)
+{
+	struct cs_char *csdata = file->private_data;
+	u32 data;
+	int err;
+	ssize_t	retval;
+
+	if (count < sizeof(data))
+		return -EINVAL;
+
+	if (get_user(data, (u32 __user *)buf))
+		retval = -EFAULT;
+	else
+		retval = count;
+
+	err = cs_hsi_command(csdata->hi, data);
+	if (err < 0)
+		retval = err;
+
+	return retval;
+}
+
+static long cs_char_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	struct cs_char *csdata = file->private_data;
+	int r = 0;
+
+	switch (cmd) {
+	case CS_GET_STATE: {
+		unsigned int state;
+
+		state = cs_hsi_get_state(csdata->hi);
+		if (copy_to_user((void __user *)arg, &state, sizeof(state)))
+			r = -EFAULT;
+
+		break;
+	}
+	case CS_SET_WAKELINE: {
+		unsigned int state;
+
+		if (copy_from_user(&state, (void __user *)arg, sizeof(state))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (state > 1) {
+			r = -EINVAL;
+			break;
+		}
+
+		cs_hsi_set_wakeline(csdata->hi, !!state);
+
+		break;
+	}
+	case CS_GET_IF_VERSION: {
+		unsigned int ifver = CS_IF_VERSION;
+
+		if (copy_to_user((void __user *)arg, &ifver, sizeof(ifver)))
+			r = -EFAULT;
+
+		break;
+	}
+	case CS_CONFIG_BUFS: {
+		struct cs_buffer_config buf_cfg;
+
+		if (copy_from_user(&buf_cfg, (void __user *)arg,
+							sizeof(buf_cfg)))
+			r = -EFAULT;
+		else
+			r = cs_hsi_buf_config(csdata->hi, &buf_cfg);
+
+		break;
+	}
+	default:
+		r = -ENOTTY;
+		break;
+	}
+
+	return r;
+}
+
+static int cs_char_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+
+	if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1)
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_DONTDUMP | VM_DONTEXPAND;
+	vma->vm_ops = &cs_char_vm_ops;
+	vma->vm_private_data = file->private_data;
+
+	return 0;
+}
+
+static int cs_char_open(struct inode *unused, struct file *file)
+{
+	int ret = 0;
+	unsigned long p;
+
+	spin_lock_bh(&cs_char_data.lock);
+	if (cs_char_data.opened) {
+		ret = -EBUSY;
+		spin_unlock_bh(&cs_char_data.lock);
+		goto out1;
+	}
+	cs_char_data.opened = 1;
+	cs_char_data.dataind_pending = 0;
+	spin_unlock_bh(&cs_char_data.lock);
+
+	p = get_zeroed_page(GFP_KERNEL);
+	if (!p) {
+		ret = -ENOMEM;
+		goto out2;
+	}
+
+	ret = cs_hsi_start(&cs_char_data.hi, cs_char_data.cl, p, CS_MMAP_SIZE);
+	if (ret) {
+		dev_err(&cs_char_data.cl->device, "Unable to initialize HSI\n");
+		goto out3;
+	}
+
+	/* these are only used in release so lock not needed */
+	cs_char_data.mmap_base = p;
+	cs_char_data.mmap_size = CS_MMAP_SIZE;
+
+	file->private_data = &cs_char_data;
+
+	return 0;
+
+out3:
+	free_page(p);
+out2:
+	spin_lock_bh(&cs_char_data.lock);
+	cs_char_data.opened = 0;
+	spin_unlock_bh(&cs_char_data.lock);
+out1:
+	return ret;
+}
+
+static void cs_free_char_queue(struct list_head *head)
+{
+	struct char_queue *entry;
+	struct list_head *cursor, *next;
+
+	if (!list_empty(head)) {
+		list_for_each_safe(cursor, next, head) {
+			entry = list_entry(cursor, struct char_queue, list);
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+
+}
+
+static int cs_char_release(struct inode *unused, struct file *file)
+{
+	struct cs_char *csdata = file->private_data;
+
+	cs_hsi_stop(csdata->hi);
+	spin_lock_bh(&csdata->lock);
+	csdata->hi = NULL;
+	free_page(csdata->mmap_base);
+	cs_free_char_queue(&csdata->chardev_queue);
+	cs_free_char_queue(&csdata->dataind_queue);
+	csdata->opened = 0;
+	spin_unlock_bh(&csdata->lock);
+
+	return 0;
+}
+
+static const struct file_operations cs_char_fops = {
+	.owner		= THIS_MODULE,
+	.read		= cs_char_read,
+	.write		= cs_char_write,
+	.poll		= cs_char_poll,
+	.unlocked_ioctl	= cs_char_ioctl,
+	.mmap		= cs_char_mmap,
+	.open		= cs_char_open,
+	.release	= cs_char_release,
+	.fasync		= cs_char_fasync,
+};
+
+static struct miscdevice cs_char_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "cmt_speech",
+	.fops	= &cs_char_fops
+};
+
+static int cs_hsi_client_probe(struct device *dev)
+{
+	int err = 0;
+	struct hsi_client *cl = to_hsi_client(dev);
+
+	dev_dbg(dev, "hsi_client_probe\n");
+	init_waitqueue_head(&cs_char_data.wait);
+	spin_lock_init(&cs_char_data.lock);
+	cs_char_data.opened = 0;
+	cs_char_data.cl = cl;
+	cs_char_data.hi = NULL;
+	INIT_LIST_HEAD(&cs_char_data.chardev_queue);
+	INIT_LIST_HEAD(&cs_char_data.dataind_queue);
+
+	cs_char_data.channel_id_cmd = hsi_get_channel_id_by_name(cl,
+		"speech-control");
+	if (cs_char_data.channel_id_cmd < 0) {
+		err = cs_char_data.channel_id_cmd;
+		dev_err(dev, "Could not get cmd channel (%d)\n", err);
+		return err;
+	}
+
+	cs_char_data.channel_id_data = hsi_get_channel_id_by_name(cl,
+		"speech-data");
+	if (cs_char_data.channel_id_data < 0) {
+		err = cs_char_data.channel_id_data;
+		dev_err(dev, "Could not get data channel (%d)\n", err);
+		return err;
+	}
+
+	err = misc_register(&cs_char_miscdev);
+	if (err)
+		dev_err(dev, "Failed to register: %d\n", err);
+
+	return err;
+}
+
+static int cs_hsi_client_remove(struct device *dev)
+{
+	struct cs_hsi_iface *hi;
+
+	dev_dbg(dev, "hsi_client_remove\n");
+	misc_deregister(&cs_char_miscdev);
+	spin_lock_bh(&cs_char_data.lock);
+	hi = cs_char_data.hi;
+	cs_char_data.hi = NULL;
+	spin_unlock_bh(&cs_char_data.lock);
+	if (hi)
+		cs_hsi_stop(hi);
+
+	return 0;
+}
+
+static struct hsi_client_driver cs_hsi_driver = {
+	.driver = {
+		.name	= "cmt-speech",
+		.owner	= THIS_MODULE,
+		.probe	= cs_hsi_client_probe,
+		.remove	= cs_hsi_client_remove,
+	},
+};
+
+static int __init cs_char_init(void)
+{
+	pr_info("CMT speech driver added\n");
+	return hsi_register_client_driver(&cs_hsi_driver);
+}
+module_init(cs_char_init);
+
+static void __exit cs_char_exit(void)
+{
+	hsi_unregister_client_driver(&cs_hsi_driver);
+	pr_info("CMT speech driver removed\n");
+}
+module_exit(cs_char_exit);
+
+MODULE_ALIAS("hsi:cmt-speech");
+MODULE_AUTHOR("Kai Vehmanen <kai.vehmanen@nokia.com>");
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@nokia.com>");
+MODULE_DESCRIPTION("CMT speech driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hsi/clients/nokia-modem.c b/drivers/hsi/clients/nokia-modem.c
index eb4dc63..bbb1923 100644
--- a/drivers/hsi/clients/nokia-modem.c
+++ b/drivers/hsi/clients/nokia-modem.c
@@ -46,6 +46,7 @@
 	struct nokia_modem_gpio	*gpios;
 	int			gpio_amount;
 	struct hsi_client	*ssi_protocol;
+	struct hsi_client	*cmt_speech;
 };
 
 static void do_nokia_modem_rst_ind_tasklet(unsigned long data)
@@ -149,6 +150,7 @@
 	struct hsi_port *port = hsi_get_port(cl);
 	int irq, pflags, err;
 	struct hsi_board_info ssip;
+	struct hsi_board_info cmtspeech;
 
 	np = dev->of_node;
 	if (!np) {
@@ -200,6 +202,7 @@
 	modem->ssi_protocol = hsi_new_client(port, &ssip);
 	if (!modem->ssi_protocol) {
 		dev_err(dev, "Could not register ssi-protocol device\n");
+		err = -ENOMEM;
 		goto error2;
 	}
 
@@ -213,12 +216,35 @@
 		goto error3;
 	}
 
-	/* TODO: register cmt-speech hsi client */
+	cmtspeech.name = "cmt-speech";
+	cmtspeech.tx_cfg = cl->tx_cfg;
+	cmtspeech.rx_cfg = cl->rx_cfg;
+	cmtspeech.platform_data = NULL;
+	cmtspeech.archdata = NULL;
+
+	modem->cmt_speech = hsi_new_client(port, &cmtspeech);
+	if (!modem->cmt_speech) {
+		dev_err(dev, "Could not register cmt-speech device\n");
+		err = -ENOMEM;
+		goto error3;
+	}
+
+	err = device_attach(&modem->cmt_speech->device);
+	if (err == 0) {
+		dev_err(dev, "Missing cmt-speech driver\n");
+		err = -EPROBE_DEFER;
+		goto error4;
+	} else if (err < 0) {
+		dev_err(dev, "Could not load cmt-speech driver (%d)\n", err);
+		goto error4;
+	}
 
 	dev_info(dev, "Registered Nokia HSI modem\n");
 
 	return 0;
 
+error4:
+	hsi_remove_client(&modem->cmt_speech->device, NULL);
 error3:
 	hsi_remove_client(&modem->ssi_protocol->device, NULL);
 error2:
@@ -237,6 +263,11 @@
 	if (!modem)
 		return 0;
 
+	if (modem->cmt_speech) {
+		hsi_remove_client(&modem->cmt_speech->device, NULL);
+		modem->cmt_speech = NULL;
+	}
+
 	if (modem->ssi_protocol) {
 		hsi_remove_client(&modem->ssi_protocol->device, NULL);
 		modem->ssi_protocol = NULL;
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 110fade..25d9e72 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -510,6 +510,7 @@
 config SENSORS_GPIO_FAN
 	tristate "GPIO fan"
 	depends on GPIOLIB
+	depends on THERMAL || THERMAL=n
 	help
 	  If you say yes here you get support for fans connected to GPIO lines.
 
@@ -599,8 +600,8 @@
 	help
 	  If you say yes here you get support for ITE IT8705F, IT8712F,
 	  IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E,
-	  IT8771E, IT8772E, IT8782F, IT8783E/F and IT8603E sensor chips,
-	  and the SiS950 clone.
+	  IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E,
+	  IT8603E, IT8620E, and IT8623E sensor chips, and the SiS950 clone.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called it87.
@@ -624,7 +625,7 @@
 	  mobile devices and servers.  Support will include, but not be limited
 	  to, ADT7408, AT30TS00, CAT34TS02, CAT6095, MAX6604, MCP9804, MCP9805,
 	  MCP98242, MCP98243, MCP98244, MCP9843, SE97, SE98, STTS424(E),
-	  STTS2002, STTS3000, TSE2002B3, TSE2002GB2, TS3000B3, and TS3000GB2.
+	  STTS2002, STTS3000, TSE2002, TSE2004, TS3000, and TS3001.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called jc42.
@@ -1145,6 +1146,16 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called nct7802.
 
+config SENSORS_NCT7904
+	tristate "Nuvoton NCT7904"
+	depends on I2C
+	help
+	  If you say yes here you get support for the Nuvoton NCT7904
+	  hardware monitoring chip, including manual fan speed control.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called nct7904.
+
 config SENSORS_PCF8591
 	tristate "Philips PCF8591 ADC/DAC"
 	depends on I2C
@@ -1164,6 +1175,7 @@
 config SENSORS_PWM_FAN
 	tristate "PWM fan"
 	depends on (PWM && OF) || COMPILE_TEST
+	depends on THERMAL || THERMAL=n
 	help
 	  If you say yes here you get support for fans connected to PWM lines.
 	  The driver uses the generic PWM interface, thus it will work on a
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 6c94147..b4a40f1 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -120,6 +120,7 @@
 obj-$(CONFIG_SENSORS_NCT6683)	+= nct6683.o
 obj-$(CONFIG_SENSORS_NCT6775)	+= nct6775.o
 obj-$(CONFIG_SENSORS_NCT7802)	+= nct7802.o
+obj-$(CONFIG_SENSORS_NCT7904)	+= nct7904.o
 obj-$(CONFIG_SENSORS_NTC_THERMISTOR)	+= ntc_thermistor.o
 obj-$(CONFIG_SENSORS_PC87360)	+= pc87360.o
 obj-$(CONFIG_SENSORS_PC87427)	+= pc87427.o
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 5b7fec8..ed303ba 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -397,14 +397,13 @@
 			struct device_attribute *devattr, char *buf) = {
 			show_label, show_crit_alarm, show_temp, show_tjmax,
 			show_ttarget };
-	static const char *const names[TOTAL_ATTRS] = {
-					"temp%d_label", "temp%d_crit_alarm",
-					"temp%d_input", "temp%d_crit",
-					"temp%d_max" };
+	static const char *const suffixes[TOTAL_ATTRS] = {
+		"label", "crit_alarm", "input", "crit", "max"
+	};
 
 	for (i = 0; i < tdata->attr_size; i++) {
-		snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, names[i],
-			attr_no);
+		snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH,
+			 "temp%d_%s", attr_no, suffixes[i]);
 		sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
 		tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
 		tdata->sd_attrs[i].dev_attr.attr.mode = S_IRUGO;
diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
index 36abf81..a3dae6d 100644
--- a/drivers/hwmon/gpio-fan.c
+++ b/drivers/hwmon/gpio-fan.c
@@ -34,10 +34,13 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/thermal.h>
 
 struct gpio_fan_data {
 	struct platform_device	*pdev;
 	struct device		*hwmon_dev;
+	/* Cooling device if any */
+	struct thermal_cooling_device *cdev;
 	struct mutex		lock; /* lock GPIOs operations. */
 	int			num_ctrl;
 	unsigned		*ctrl;
@@ -387,6 +390,53 @@
 	return 0;
 }
 
+static int gpio_fan_get_max_state(struct thermal_cooling_device *cdev,
+				  unsigned long *state)
+{
+	struct gpio_fan_data *fan_data = cdev->devdata;
+
+	if (!fan_data)
+		return -EINVAL;
+
+	*state = fan_data->num_speed - 1;
+	return 0;
+}
+
+static int gpio_fan_get_cur_state(struct thermal_cooling_device *cdev,
+				  unsigned long *state)
+{
+	struct gpio_fan_data *fan_data = cdev->devdata;
+	int r;
+
+	if (!fan_data)
+		return -EINVAL;
+
+	r = get_fan_speed_index(fan_data);
+	if (r < 0)
+		return r;
+
+	*state = r;
+	return 0;
+}
+
+static int gpio_fan_set_cur_state(struct thermal_cooling_device *cdev,
+				  unsigned long state)
+{
+	struct gpio_fan_data *fan_data = cdev->devdata;
+
+	if (!fan_data)
+		return -EINVAL;
+
+	set_fan_speed(fan_data, state);
+	return 0;
+}
+
+static const struct thermal_cooling_device_ops gpio_fan_cool_ops = {
+	.get_max_state = gpio_fan_get_max_state,
+	.get_cur_state = gpio_fan_get_cur_state,
+	.set_cur_state = gpio_fan_set_cur_state,
+};
+
 #ifdef CONFIG_OF_GPIO
 /*
  * Translate OpenFirmware node properties into platform_data
@@ -404,10 +454,32 @@
 
 	node = dev->of_node;
 
+	/* Alarm GPIO if one exists */
+	if (of_gpio_named_count(node, "alarm-gpios") > 0) {
+		struct gpio_fan_alarm *alarm;
+		int val;
+		enum of_gpio_flags flags;
+
+		alarm = devm_kzalloc(dev, sizeof(struct gpio_fan_alarm),
+					GFP_KERNEL);
+		if (!alarm)
+			return -ENOMEM;
+
+		val = of_get_named_gpio_flags(node, "alarm-gpios", 0, &flags);
+		if (val < 0)
+			return val;
+		alarm->gpio = val;
+		alarm->active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+		pdata->alarm = alarm;
+	}
+
 	/* Fill GPIO pin array */
 	pdata->num_ctrl = of_gpio_count(node);
 	if (pdata->num_ctrl <= 0) {
-		dev_err(dev, "gpios DT property empty / missing");
+		if (pdata->alarm)
+			return 0;
+		dev_err(dev, "DT properties empty / missing");
 		return -ENODEV;
 	}
 	ctrl = devm_kzalloc(dev, pdata->num_ctrl * sizeof(unsigned),
@@ -460,26 +532,6 @@
 	}
 	pdata->speed = speed;
 
-	/* Alarm GPIO if one exists */
-	if (of_gpio_named_count(node, "alarm-gpios") > 0) {
-		struct gpio_fan_alarm *alarm;
-		int val;
-		enum of_gpio_flags flags;
-
-		alarm = devm_kzalloc(dev, sizeof(struct gpio_fan_alarm),
-					GFP_KERNEL);
-		if (!alarm)
-			return -ENOMEM;
-
-		val = of_get_named_gpio_flags(node, "alarm-gpios", 0, &flags);
-		if (val < 0)
-			return val;
-		alarm->gpio = val;
-		alarm->active_low = flags & OF_GPIO_ACTIVE_LOW;
-
-		pdata->alarm = alarm;
-	}
-
 	return 0;
 }
 
@@ -495,6 +547,11 @@
 	struct gpio_fan_data *fan_data;
 	struct gpio_fan_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
+	fan_data = devm_kzalloc(&pdev->dev, sizeof(struct gpio_fan_data),
+				GFP_KERNEL);
+	if (!fan_data)
+		return -ENOMEM;
+
 #ifdef CONFIG_OF_GPIO
 	if (!pdata) {
 		pdata = devm_kzalloc(&pdev->dev,
@@ -512,11 +569,6 @@
 		return -EINVAL;
 #endif /* CONFIG_OF_GPIO */
 
-	fan_data = devm_kzalloc(&pdev->dev, sizeof(struct gpio_fan_data),
-				GFP_KERNEL);
-	if (!fan_data)
-		return -ENOMEM;
-
 	fan_data->pdev = pdev;
 	platform_set_drvdata(pdev, fan_data);
 	mutex_init(&fan_data->lock);
@@ -544,18 +596,39 @@
 						       gpio_fan_groups);
 	if (IS_ERR(fan_data->hwmon_dev))
 		return PTR_ERR(fan_data->hwmon_dev);
+#ifdef CONFIG_OF_GPIO
+	/* Optional cooling device register for Device tree platforms */
+	fan_data->cdev = thermal_of_cooling_device_register(pdev->dev.of_node,
+							    "gpio-fan",
+							    fan_data,
+							    &gpio_fan_cool_ops);
+#else /* CONFIG_OF_GPIO */
+	/* Optional cooling device register for non Device tree platforms */
+	fan_data->cdev = thermal_cooling_device_register("gpio-fan", fan_data,
+							 &gpio_fan_cool_ops);
+#endif /* CONFIG_OF_GPIO */
 
 	dev_info(&pdev->dev, "GPIO fan initialized\n");
 
 	return 0;
 }
 
-static void gpio_fan_shutdown(struct platform_device *pdev)
+static int gpio_fan_remove(struct platform_device *pdev)
 {
-	struct gpio_fan_data *fan_data = dev_get_drvdata(&pdev->dev);
+	struct gpio_fan_data *fan_data = platform_get_drvdata(pdev);
+
+	if (!IS_ERR(fan_data->cdev))
+		thermal_cooling_device_unregister(fan_data->cdev);
 
 	if (fan_data->ctrl)
 		set_fan_speed(fan_data, 0);
+
+	return 0;
+}
+
+static void gpio_fan_shutdown(struct platform_device *pdev)
+{
+	gpio_fan_remove(pdev);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -589,6 +662,7 @@
 
 static struct platform_driver gpio_fan_driver = {
 	.probe		= gpio_fan_probe,
+	.remove		= gpio_fan_remove,
 	.shutdown	= gpio_fan_shutdown,
 	.driver	= {
 		.name	= "gpio-fan",
diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index 030e7ff..21b9c72 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -56,15 +56,10 @@
 static u8 const watt_sensor_sig[]  = {0x41, 0x43};
 
 #define PEX_NUM_SENSOR_FUNCS	3
-static char const * const power_sensor_name_templates[] = {
-	"%s%d_average",
-	"%s%d_average_lowest",
-	"%s%d_average_highest"
-};
-static char const * const temp_sensor_name_templates[] = {
-	"%s%d_input",
-	"%s%d_input_lowest",
-	"%s%d_input_highest"
+static const char * const sensor_name_suffixes[] = {
+	"",
+	"_lowest",
+	"_highest"
 };
 
 static void ibmpex_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data);
@@ -355,9 +350,11 @@
 		return -ENOMEM;
 
 	if (type == TEMP_SENSOR)
-		sprintf(n, temp_sensor_name_templates[func], "temp", counter);
+		sprintf(n, "temp%d_input%s",
+			counter, sensor_name_suffixes[func]);
 	else if (type == POWER_SENSOR)
-		sprintf(n, power_sensor_name_templates[func], "power", counter);
+		sprintf(n, "power%d_average%s",
+			counter, sensor_name_suffixes[func]);
 
 	sysfs_attr_init(&data->sensors[sensor].attr[func].dev_attr.attr);
 	data->sensors[sensor].attr[func].dev_attr.attr.name = n;
diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index febe817..4255514 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -30,8 +30,11 @@
 #include <linux/platform_device.h>
 #include <asm/opal.h>
 #include <linux/err.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
 
 #define MAX_ATTR_LEN	32
+#define MAX_LABEL_LEN	64
 
 /* Sensor suffix name from DT */
 #define DT_FAULT_ATTR_SUFFIX		"faulted"
@@ -44,17 +47,20 @@
  */
 enum sensors {
 	FAN,
-	AMBIENT_TEMP,
+	TEMP,
 	POWER_SUPPLY,
 	POWER_INPUT,
 	MAX_SENSOR_TYPE,
 };
 
+#define INVALID_INDEX (-1U)
+
 static struct sensor_group {
 	const char *name;
 	const char *compatible;
 	struct attribute_group group;
 	u32 attr_count;
+	u32 hwmon_index;
 } sensor_groups[] = {
 	{"fan", "ibm,opal-sensor-cooling-fan"},
 	{"temp", "ibm,opal-sensor-amb-temp"},
@@ -64,7 +70,10 @@
 
 struct sensor_data {
 	u32 id; /* An opaque id of the firmware for each sensor */
+	u32 hwmon_index;
+	u32 opal_index;
 	enum sensors type;
+	char label[MAX_LABEL_LEN];
 	char name[MAX_ATTR_LEN];
 	struct device_attribute dev_attr;
 };
@@ -87,7 +96,7 @@
 		return ret;
 
 	/* Convert temperature to milli-degrees */
-	if (sdata->type == AMBIENT_TEMP)
+	if (sdata->type == TEMP)
 		x *= 1000;
 	/* Convert power to micro-watts */
 	else if (sdata->type == POWER_INPUT)
@@ -96,8 +105,65 @@
 	return sprintf(buf, "%u\n", x);
 }
 
-static int get_sensor_index_attr(const char *name, u32 *index,
-					char *attr)
+static ssize_t show_label(struct device *dev, struct device_attribute *devattr,
+			  char *buf)
+{
+	struct sensor_data *sdata = container_of(devattr, struct sensor_data,
+						 dev_attr);
+
+	return sprintf(buf, "%s\n", sdata->label);
+}
+
+static int __init get_logical_cpu(int hwcpu)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		if (get_hard_smp_processor_id(cpu) == hwcpu)
+			return cpu;
+
+	return -ENOENT;
+}
+
+static void __init make_sensor_label(struct device_node *np,
+				     struct sensor_data *sdata,
+				     const char *label)
+{
+	u32 id;
+	size_t n;
+
+	n = snprintf(sdata->label, sizeof(sdata->label), "%s", label);
+
+	/*
+	 * Core temp pretty print
+	 */
+	if (!of_property_read_u32(np, "ibm,pir", &id)) {
+		int cpuid = get_logical_cpu(id);
+
+		if (cpuid >= 0)
+			/*
+			 * The digital thermal sensors are associated
+			 * with a core. Let's print out the range of
+			 * cpu ids corresponding to the hardware
+			 * threads of the core.
+			 */
+			n += snprintf(sdata->label + n,
+				      sizeof(sdata->label) - n, " %d-%d",
+				      cpuid, cpuid + threads_per_core - 1);
+		else
+			n += snprintf(sdata->label + n,
+				      sizeof(sdata->label) - n, " phy%d", id);
+	}
+
+	/*
+	 * Membuffer pretty print
+	 */
+	if (!of_property_read_u32(np, "ibm,chip-id", &id))
+		n += snprintf(sdata->label + n, sizeof(sdata->label) - n,
+			      " %d", id & 0xffff);
+}
+
+static int get_sensor_index_attr(const char *name, u32 *index, char *attr)
 {
 	char *hash_pos = strchr(name, '#');
 	char buf[8] = { 0 };
@@ -127,46 +193,90 @@
 	return 0;
 }
 
+static const char *convert_opal_attr_name(enum sensors type,
+					  const char *opal_attr)
+{
+	const char *attr_name = NULL;
+
+	if (!strcmp(opal_attr, DT_FAULT_ATTR_SUFFIX)) {
+		attr_name = "fault";
+	} else if (!strcmp(opal_attr, DT_DATA_ATTR_SUFFIX)) {
+		attr_name = "input";
+	} else if (!strcmp(opal_attr, DT_THRESHOLD_ATTR_SUFFIX)) {
+		if (type == TEMP)
+			attr_name = "max";
+		else if (type == FAN)
+			attr_name = "min";
+	}
+
+	return attr_name;
+}
+
 /*
  * This function translates the DT node name into the 'hwmon' attribute name.
  * IBMPOWERNV device node appear like cooling-fan#2-data, amb-temp#1-thrs etc.
  * which need to be mapped as fan2_input, temp1_max respectively before
  * populating them inside hwmon device class.
  */
-static int create_hwmon_attr_name(struct device *dev, enum sensors type,
-					 const char *node_name,
-					 char *hwmon_attr_name)
+static const char *parse_opal_node_name(const char *node_name,
+					enum sensors type, u32 *index)
 {
 	char attr_suffix[MAX_ATTR_LEN];
-	char *attr_name;
-	u32 index;
+	const char *attr_name;
 	int err;
 
-	err = get_sensor_index_attr(node_name, &index, attr_suffix);
-	if (err) {
-		dev_err(dev, "Sensor device node name '%s' is invalid\n",
-			node_name);
-		return err;
+	err = get_sensor_index_attr(node_name, index, attr_suffix);
+	if (err)
+		return ERR_PTR(err);
+
+	attr_name = convert_opal_attr_name(type, attr_suffix);
+	if (!attr_name)
+		return ERR_PTR(-ENOENT);
+
+	return attr_name;
+}
+
+static int get_sensor_type(struct device_node *np)
+{
+	enum sensors type;
+	const char *str;
+
+	for (type = 0; type < MAX_SENSOR_TYPE; type++) {
+		if (of_device_is_compatible(np, sensor_groups[type].compatible))
+			return type;
 	}
 
-	if (!strcmp(attr_suffix, DT_FAULT_ATTR_SUFFIX)) {
-		attr_name = "fault";
-	} else if (!strcmp(attr_suffix, DT_DATA_ATTR_SUFFIX)) {
-		attr_name = "input";
-	} else if (!strcmp(attr_suffix, DT_THRESHOLD_ATTR_SUFFIX)) {
-		if (type == AMBIENT_TEMP)
-			attr_name = "max";
-		else if (type == FAN)
-			attr_name = "min";
-		else
-			return -ENOENT;
-	} else {
-		return -ENOENT;
-	}
+	/*
+	 * Let's check if we have a newer device tree
+	 */
+	if (!of_device_is_compatible(np, "ibm,opal-sensor"))
+		return MAX_SENSOR_TYPE;
 
-	snprintf(hwmon_attr_name, MAX_ATTR_LEN, "%s%d_%s",
-		 sensor_groups[type].name, index, attr_name);
-	return 0;
+	if (of_property_read_string(np, "sensor-type", &str))
+		return MAX_SENSOR_TYPE;
+
+	for (type = 0; type < MAX_SENSOR_TYPE; type++)
+		if (!strcmp(str, sensor_groups[type].name))
+			return type;
+
+	return MAX_SENSOR_TYPE;
+}
+
+static u32 get_sensor_hwmon_index(struct sensor_data *sdata,
+				  struct sensor_data *sdata_table, int count)
+{
+	int i;
+
+	/*
+	 * We don't use the OPAL index on newer device trees
+	 */
+	if (sdata->opal_index != INVALID_INDEX) {
+		for (i = 0; i < count; i++)
+			if (sdata_table[i].opal_index == sdata->opal_index &&
+			    sdata_table[i].type == sdata->type)
+				return sdata_table[i].hwmon_index;
+	}
+	return ++sensor_groups[sdata->type].hwmon_index;
 }
 
 static int populate_attr_groups(struct platform_device *pdev)
@@ -178,15 +288,22 @@
 
 	opal = of_find_node_by_path("/ibm,opal/sensors");
 	for_each_child_of_node(opal, np) {
+		const char *label;
+
 		if (np->name == NULL)
 			continue;
 
-		for (type = 0; type < MAX_SENSOR_TYPE; type++)
-			if (of_device_is_compatible(np,
-					sensor_groups[type].compatible)) {
-				sensor_groups[type].attr_count++;
-				break;
-			}
+		type = get_sensor_type(np);
+		if (type == MAX_SENSOR_TYPE)
+			continue;
+
+		sensor_groups[type].attr_count++;
+
+		/*
+		 * add a new attribute for labels
+		 */
+		if (!of_property_read_string(np, "label", &label))
+			sensor_groups[type].attr_count++;
 	}
 
 	of_node_put(opal);
@@ -207,6 +324,21 @@
 	return 0;
 }
 
+static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name,
+			      ssize_t (*show)(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf))
+{
+	snprintf(sdata->name, MAX_ATTR_LEN, "%s%d_%s",
+		 sensor_groups[sdata->type].name, sdata->hwmon_index,
+		 attr_name);
+
+	sysfs_attr_init(&sdata->dev_attr.attr);
+	sdata->dev_attr.attr.name = sdata->name;
+	sdata->dev_attr.attr.mode = S_IRUGO;
+	sdata->dev_attr.show = show;
+}
+
 /*
  * Iterate through the device tree for each child of 'sensors' node, create
  * a sysfs attribute file, the file is named by translating the DT node name
@@ -233,18 +365,23 @@
 	}
 
 	for_each_child_of_node(opal, np) {
+		const char *attr_name;
+		u32 opal_index;
+		const char *label;
+
 		if (np->name == NULL)
 			continue;
 
-		for (type = 0; type < MAX_SENSOR_TYPE; type++)
-			if (of_device_is_compatible(np,
-					sensor_groups[type].compatible))
-				break;
-
+		type = get_sensor_type(np);
 		if (type == MAX_SENSOR_TYPE)
 			continue;
 
-		if (of_property_read_u32(np, "sensor-id", &sensor_id)) {
+		/*
+		 * Newer device trees use a "sensor-data" property
+		 * name for input.
+		 */
+		if (of_property_read_u32(np, "sensor-id", &sensor_id) &&
+		    of_property_read_u32(np, "sensor-data", &sensor_id)) {
 			dev_info(&pdev->dev,
 				 "'sensor-id' missing in the node '%s'\n",
 				 np->name);
@@ -253,18 +390,46 @@
 
 		sdata[count].id = sensor_id;
 		sdata[count].type = type;
-		err = create_hwmon_attr_name(&pdev->dev, type, np->name,
-					     sdata[count].name);
-		if (err)
-			goto exit_put_node;
 
-		sysfs_attr_init(&sdata[count].dev_attr.attr);
-		sdata[count].dev_attr.attr.name = sdata[count].name;
-		sdata[count].dev_attr.attr.mode = S_IRUGO;
-		sdata[count].dev_attr.show = show_sensor;
+		/*
+		 * If we can not parse the node name, it means we are
+		 * running on a newer device tree. We can just forget
+		 * about the OPAL index and use a defaut value for the
+		 * hwmon attribute name
+		 */
+		attr_name = parse_opal_node_name(np->name, type, &opal_index);
+		if (IS_ERR(attr_name)) {
+			attr_name = "input";
+			opal_index = INVALID_INDEX;
+		}
+
+		sdata[count].opal_index = opal_index;
+		sdata[count].hwmon_index =
+			get_sensor_hwmon_index(&sdata[count], sdata, count);
+
+		create_hwmon_attr(&sdata[count], attr_name, show_sensor);
 
 		pgroups[type]->attrs[sensor_groups[type].attr_count++] =
 				&sdata[count++].dev_attr.attr;
+
+		if (!of_property_read_string(np, "label", &label)) {
+			/*
+			 * For the label attribute, we can reuse the
+			 * "properties" of the previous "input"
+			 * attribute. They are related to the same
+			 * sensor.
+			 */
+			sdata[count].type = type;
+			sdata[count].opal_index = sdata[count - 1].opal_index;
+			sdata[count].hwmon_index = sdata[count - 1].hwmon_index;
+
+			make_sensor_label(np, &sdata[count], label);
+
+			create_hwmon_attr(&sdata[count], "label", show_label);
+
+			pgroups[type]->attrs[sensor_groups[type].attr_count++] =
+				&sdata[count++].dev_attr.attr;
+		}
 	}
 
 exit_put_node:
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 409116c..d0ee556 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -11,6 +11,7 @@
  *  similar parts.  The other devices are supported by different drivers.
  *
  *  Supports: IT8603E  Super I/O chip w/LPC interface
+ *            IT8620E  Super I/O chip w/LPC interface
  *            IT8623E  Super I/O chip w/LPC interface
  *            IT8705F  Super I/O chip w/LPC interface
  *            IT8712F  Super I/O chip w/LPC interface
@@ -23,8 +24,11 @@
  *            IT8758E  Super I/O chip w/LPC interface
  *            IT8771E  Super I/O chip w/LPC interface
  *            IT8772E  Super I/O chip w/LPC interface
+ *            IT8781F  Super I/O chip w/LPC interface
  *            IT8782F  Super I/O chip w/LPC interface
  *            IT8783E/F Super I/O chip w/LPC interface
+ *            IT8786E  Super I/O chip w/LPC interface
+ *            IT8790E  Super I/O chip w/LPC interface
  *            Sis950   A clone of the IT8705F
  *
  *  Copyright (C) 2001 Chris Gauthron
@@ -66,7 +70,7 @@
 #define DRVNAME "it87"
 
 enum chips { it87, it8712, it8716, it8718, it8720, it8721, it8728, it8771,
-	     it8772, it8782, it8783, it8603 };
+	     it8772, it8781, it8782, it8783, it8786, it8790, it8603, it8620 };
 
 static unsigned short force_id;
 module_param(force_id, ushort, 0);
@@ -146,15 +150,20 @@
 #define IT8728F_DEVID 0x8728
 #define IT8771E_DEVID 0x8771
 #define IT8772E_DEVID 0x8772
+#define IT8781F_DEVID 0x8781
 #define IT8782F_DEVID 0x8782
 #define IT8783E_DEVID 0x8783
+#define IT8786E_DEVID 0x8786
+#define IT8790E_DEVID 0x8790
 #define IT8603E_DEVID 0x8603
+#define IT8620E_DEVID 0x8620
 #define IT8623E_DEVID 0x8623
 #define IT87_ACT_REG  0x30
 #define IT87_BASE_REG 0x60
 
 /* Logical device 7 registers (IT8712F and later) */
 #define IT87_SIO_GPIO1_REG	0x25
+#define IT87_SIO_GPIO2_REG	0x26
 #define IT87_SIO_GPIO3_REG	0x27
 #define IT87_SIO_GPIO5_REG	0x29
 #define IT87_SIO_PINX1_REG	0x2a	/* Pin selection */
@@ -207,11 +216,11 @@
 
 /* Monitors: 9 voltage (0 to 7, battery), 3 temp (1 to 3), 3 fan (1 to 3) */
 
-static const u8 IT87_REG_FAN[]		= { 0x0d, 0x0e, 0x0f, 0x80, 0x82 };
-static const u8 IT87_REG_FAN_MIN[]	= { 0x10, 0x11, 0x12, 0x84, 0x86 };
-static const u8 IT87_REG_FANX[]		= { 0x18, 0x19, 0x1a, 0x81, 0x83 };
-static const u8 IT87_REG_FANX_MIN[]	= { 0x1b, 0x1c, 0x1d, 0x85, 0x87 };
-static const u8 IT87_REG_TEMP_OFFSET[]	= { 0x56, 0x57, 0x59 };
+static const u8 IT87_REG_FAN[]         = { 0x0d, 0x0e, 0x0f, 0x80, 0x82, 0x4c };
+static const u8 IT87_REG_FAN_MIN[]     = { 0x10, 0x11, 0x12, 0x84, 0x86, 0x4e };
+static const u8 IT87_REG_FANX[]        = { 0x18, 0x19, 0x1a, 0x81, 0x83, 0x4d };
+static const u8 IT87_REG_FANX_MIN[]    = { 0x1b, 0x1c, 0x1d, 0x85, 0x87, 0x4f };
+static const u8 IT87_REG_TEMP_OFFSET[] = { 0x56, 0x57, 0x59 };
 
 #define IT87_REG_FAN_MAIN_CTRL 0x13
 #define IT87_REG_FAN_CTL       0x14
@@ -238,6 +247,7 @@
 
 struct it87_devices {
 	const char *name;
+	const char * const suffix;
 	u16 features;
 	u8 peci_mask;
 	u8 old_peci_mask;
@@ -250,79 +260,131 @@
 #define FEAT_TEMP_OFFSET	(1 << 4)
 #define FEAT_TEMP_PECI		(1 << 5)
 #define FEAT_TEMP_OLD_PECI	(1 << 6)
+#define FEAT_FAN16_CONFIG	(1 << 7)	/* Need to enable 16-bit fans */
+#define FEAT_FIVE_FANS		(1 << 8)	/* Supports five fans */
+#define FEAT_VID		(1 << 9)	/* Set if chip supports VID */
+#define FEAT_IN7_INTERNAL	(1 << 10)	/* Set if in7 is internal */
+#define FEAT_SIX_FANS		(1 << 11)	/* Supports six fans */
 
 static const struct it87_devices it87_devices[] = {
 	[it87] = {
 		.name = "it87",
+		.suffix = "F",
 		.features = FEAT_OLD_AUTOPWM,	/* may need to overwrite */
 	},
 	[it8712] = {
 		.name = "it8712",
-		.features = FEAT_OLD_AUTOPWM,	/* may need to overwrite */
+		.suffix = "F",
+		.features = FEAT_OLD_AUTOPWM | FEAT_VID,
+						/* may need to overwrite */
 	},
 	[it8716] = {
 		.name = "it8716",
-		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET,
+		.suffix = "F",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
+		  | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
 	},
 	[it8718] = {
 		.name = "it8718",
-		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI,
+		.suffix = "F",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
 		.old_peci_mask = 0x4,
 	},
 	[it8720] = {
 		.name = "it8720",
-		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI,
+		.suffix = "F",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
 		.old_peci_mask = 0x4,
 	},
 	[it8721] = {
 		.name = "it8721",
+		.suffix = "F",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI
+		  | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_IN7_INTERNAL,
 		.peci_mask = 0x05,
 		.old_peci_mask = 0x02,	/* Actually reports PCH */
 	},
 	[it8728] = {
 		.name = "it8728",
+		.suffix = "F",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_FIVE_FANS
+		  | FEAT_IN7_INTERNAL,
 		.peci_mask = 0x07,
 	},
 	[it8771] = {
 		.name = "it8771",
+		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
-					/* PECI: guesswork */
-					/* 12mV ADC (OHM) */
-					/* 16 bit fans (OHM) */
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+				/* PECI: guesswork */
+				/* 12mV ADC (OHM) */
+				/* 16 bit fans (OHM) */
+				/* three fans, always 16 bit (guesswork) */
 		.peci_mask = 0x07,
 	},
 	[it8772] = {
 		.name = "it8772",
+		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
-					/* PECI (coreboot) */
-					/* 12mV ADC (HWSensors4, OHM) */
-					/* 16 bit fans (HWSensors4, OHM) */
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+				/* PECI (coreboot) */
+				/* 12mV ADC (HWSensors4, OHM) */
+				/* 16 bit fans (HWSensors4, OHM) */
+				/* three fans, always 16 bit (datasheet) */
 		.peci_mask = 0x07,
 	},
+	[it8781] = {
+		.name = "it8781",
+		.suffix = "F",
+		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+		.old_peci_mask = 0x4,
+	},
 	[it8782] = {
 		.name = "it8782",
+		.suffix = "F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
 		.old_peci_mask = 0x4,
 	},
 	[it8783] = {
 		.name = "it8783",
+		.suffix = "E/F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
 		.old_peci_mask = 0x4,
 	},
+	[it8786] = {
+		.name = "it8786",
+		.suffix = "E",
+		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		.peci_mask = 0x07,
+	},
+	[it8790] = {
+		.name = "it8790",
+		.suffix = "E",
+		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		.peci_mask = 0x07,
+	},
 	[it8603] = {
 		.name = "it8603",
+		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		.peci_mask = 0x07,
+	},
+	[it8620] = {
+		.name = "it8620",
+		.suffix = "E",
+		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_SIX_FANS
+		  | FEAT_IN7_INTERNAL,
 		.peci_mask = 0x07,
 	},
 };
@@ -337,6 +399,12 @@
 #define has_temp_old_peci(data, nr) \
 				(((data)->features & FEAT_TEMP_OLD_PECI) && \
 				 ((data)->old_peci_mask & (1 << nr)))
+#define has_fan16_config(data)	((data)->features & FEAT_FAN16_CONFIG)
+#define has_five_fans(data)	((data)->features & (FEAT_FIVE_FANS | \
+						     FEAT_SIX_FANS))
+#define has_vid(data)		((data)->features & FEAT_VID)
+#define has_in7_internal(data)	((data)->features & FEAT_IN7_INTERNAL)
+#define has_six_fans(data)	((data)->features & FEAT_SIX_FANS)
 
 struct it87_sio_data {
 	enum chips type;
@@ -373,7 +441,7 @@
 	u16 in_scaled;		/* Internal voltage sensors are scaled */
 	u8 in[10][3];		/* [nr][0]=in, [1]=min, [2]=max */
 	u8 has_fan;		/* Bitfield, fans enabled */
-	u16 fan[5][2];		/* Register values, [nr][0]=fan, [1]=min */
+	u16 fan[6][2];		/* Register values, [nr][0]=fan, [1]=min */
 	u8 has_temp;		/* Bitfield, temp sensors enabled */
 	s8 temp[3][4];		/* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
 	u8 sensor;		/* Register value (IT87_REG_TEMP_ENABLE) */
@@ -475,15 +543,25 @@
 }
 #define DIV_FROM_REG(val) (1 << (val))
 
+/*
+ * PWM base frequencies. The frequency has to be divided by either 128 or 256,
+ * depending on the chip type, to calculate the actual PWM frequency.
+ *
+ * Some of the chip datasheets suggest a base frequency of 51 kHz instead
+ * of 750 kHz for the slowest base frequency, resulting in a PWM frequency
+ * of 200 Hz. Sometimes both PWM frequency select registers are affected,
+ * sometimes just one. It is unknown if this is a datasheet error or real,
+ * so this is ignored for now.
+ */
 static const unsigned int pwm_freq[8] = {
-	48000000 / 128,
-	24000000 / 128,
-	12000000 / 128,
-	8000000 / 128,
-	6000000 / 128,
-	3000000 / 128,
-	1500000 / 128,
-	750000 / 128,
+	48000000,
+	24000000,
+	12000000,
+	8000000,
+	6000000,
+	3000000,
+	1500000,
+	750000,
 };
 
 static int it87_probe(struct platform_device *pdev);
@@ -801,8 +879,11 @@
 {
 	struct it87_data *data = it87_update_device(dev);
 	int index = (data->fan_ctl >> 4) & 0x07;
+	unsigned int freq;
 
-	return sprintf(buf, "%u\n", pwm_freq[index]);
+	freq = pwm_freq[index] / (has_newer_autopwm(data) ? 256 : 128);
+
+	return sprintf(buf, "%u\n", freq);
 }
 
 static ssize_t set_fan(struct device *dev, struct device_attribute *attr,
@@ -1024,6 +1105,9 @@
 	if (kstrtoul(buf, 10, &val) < 0)
 		return -EINVAL;
 
+	val = clamp_val(val, 0, 1000000);
+	val *= has_newer_autopwm(data) ? 256 : 128;
+
 	/* Search for the nearest available frequency */
 	for (i = 0; i < 7; i++) {
 		if (val > (pwm_freq[i] + pwm_freq[i+1]) / 2)
@@ -1196,6 +1280,10 @@
 static SENSOR_DEVICE_ATTR_2(fan5_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
 			    4, 1);
 
+static SENSOR_DEVICE_ATTR_2(fan6_input, S_IRUGO, show_fan, NULL, 5, 0);
+static SENSOR_DEVICE_ATTR_2(fan6_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+			    5, 1);
+
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
 			  show_pwm_enable, set_pwm_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0);
@@ -1326,6 +1414,7 @@
 static SENSOR_DEVICE_ATTR(fan3_alarm, S_IRUGO, show_alarm, NULL, 2);
 static SENSOR_DEVICE_ATTR(fan4_alarm, S_IRUGO, show_alarm, NULL, 3);
 static SENSOR_DEVICE_ATTR(fan5_alarm, S_IRUGO, show_alarm, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan6_alarm, S_IRUGO, show_alarm, NULL, 7);
 static SENSOR_DEVICE_ATTR(temp1_alarm, S_IRUGO, show_alarm, NULL, 16);
 static SENSOR_DEVICE_ATTR(temp2_alarm, S_IRUGO, show_alarm, NULL, 17);
 static SENSOR_DEVICE_ATTR(temp3_alarm, S_IRUGO, show_alarm, NULL, 18);
@@ -1376,6 +1465,7 @@
 static SENSOR_DEVICE_ATTR(fan3_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(fan4_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(fan5_beep, S_IRUGO, show_beep, set_beep, 0);
+static SENSOR_DEVICE_ATTR(fan6_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(temp1_beep, S_IRUGO | S_IWUSR,
 			  show_beep, set_beep, 2);
 static SENSOR_DEVICE_ATTR(temp2_beep, S_IRUGO, show_beep, NULL, 2);
@@ -1579,7 +1669,7 @@
 	&sensor_dev_attr_temp3_beep.dev_attr.attr,
 };
 
-static struct attribute *it87_attributes_fan[5][3+1] = { {
+static struct attribute *it87_attributes_fan[6][3+1] = { {
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_fan1_min.dev_attr.attr,
 	&sensor_dev_attr_fan1_alarm.dev_attr.attr,
@@ -1604,14 +1694,20 @@
 	&sensor_dev_attr_fan5_min.dev_attr.attr,
 	&sensor_dev_attr_fan5_alarm.dev_attr.attr,
 	NULL
+}, {
+	&sensor_dev_attr_fan6_input.dev_attr.attr,
+	&sensor_dev_attr_fan6_min.dev_attr.attr,
+	&sensor_dev_attr_fan6_alarm.dev_attr.attr,
+	NULL
 } };
 
-static const struct attribute_group it87_group_fan[5] = {
+static const struct attribute_group it87_group_fan[6] = {
 	{ .attrs = it87_attributes_fan[0] },
 	{ .attrs = it87_attributes_fan[1] },
 	{ .attrs = it87_attributes_fan[2] },
 	{ .attrs = it87_attributes_fan[3] },
 	{ .attrs = it87_attributes_fan[4] },
+	{ .attrs = it87_attributes_fan[5] },
 };
 
 static const struct attribute *it87_attributes_fan_div[] = {
@@ -1693,6 +1789,7 @@
 	&sensor_dev_attr_fan3_beep.dev_attr.attr,
 	&sensor_dev_attr_fan4_beep.dev_attr.attr,
 	&sensor_dev_attr_fan5_beep.dev_attr.attr,
+	&sensor_dev_attr_fan6_beep.dev_attr.attr,
 };
 
 static struct attribute *it87_attributes_vid[] = {
@@ -1724,6 +1821,7 @@
 	int err;
 	u16 chip_type;
 	const char *board_vendor, *board_name;
+	const struct it87_devices *config;
 
 	err = superio_enter();
 	if (err)
@@ -1761,16 +1859,28 @@
 	case IT8772E_DEVID:
 		sio_data->type = it8772;
 		break;
+	case IT8781F_DEVID:
+		sio_data->type = it8781;
+		break;
 	case IT8782F_DEVID:
 		sio_data->type = it8782;
 		break;
 	case IT8783E_DEVID:
 		sio_data->type = it8783;
 		break;
+	case IT8786E_DEVID:
+		sio_data->type = it8786;
+		break;
+	case IT8790E_DEVID:
+		sio_data->type = it8790;
+		break;
 	case IT8603E_DEVID:
 	case IT8623E_DEVID:
 		sio_data->type = it8603;
 		break;
+	case IT8620E_DEVID:
+		sio_data->type = it8620;
+		break;
 	case 0xffff:	/* No device at all */
 		goto exit;
 	default:
@@ -1792,30 +1902,34 @@
 
 	err = 0;
 	sio_data->revision = superio_inb(DEVREV) & 0x0f;
-	pr_info("Found IT%04x%c chip at 0x%x, revision %d\n", chip_type,
-		chip_type == 0x8771 || chip_type == 0x8772 ||
-		chip_type == 0x8603 ? 'E' : 'F', *address,
-		sio_data->revision);
+	pr_info("Found IT%04x%s chip at 0x%x, revision %d\n", chip_type,
+		it87_devices[sio_data->type].suffix,
+		*address, sio_data->revision);
+
+	config = &it87_devices[sio_data->type];
+
+	/* in7 (VSB or VCCH5V) is always internal on some chips */
+	if (has_in7_internal(config))
+		sio_data->internal |= (1 << 1);
 
 	/* in8 (Vbat) is always internal */
-	sio_data->internal = (1 << 2);
+	sio_data->internal |= (1 << 2);
+
 	/* Only the IT8603E has in9 */
 	if (sio_data->type != it8603)
 		sio_data->skip_in |= (1 << 9);
 
-	/* Read GPIO config and VID value from LDN 7 (GPIO) */
-	if (sio_data->type == it87) {
-		/* The IT8705F doesn't have VID pins at all */
+	if (!has_vid(config))
 		sio_data->skip_vid = 1;
 
+	/* Read GPIO config and VID value from LDN 7 (GPIO) */
+	if (sio_data->type == it87) {
 		/* The IT8705F has a different LD number for GPIO */
 		superio_select(5);
 		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
 	} else if (sio_data->type == it8783) {
 		int reg25, reg27, reg2a, reg2c, regef;
 
-		sio_data->skip_vid = 1;	/* No VID */
-
 		superio_select(GPIO);
 
 		reg25 = superio_inb(IT87_SIO_GPIO1_REG);
@@ -1881,7 +1995,6 @@
 	} else if (sio_data->type == it8603) {
 		int reg27, reg29;
 
-		sio_data->skip_vid = 1;	/* No VID */
 		superio_select(GPIO);
 
 		reg27 = superio_inb(IT87_SIO_GPIO3_REG);
@@ -1902,14 +2015,36 @@
 		sio_data->skip_in |= (1 << 5); /* No VIN5 */
 		sio_data->skip_in |= (1 << 6); /* No VIN6 */
 
-		/* no fan4 */
-		sio_data->skip_pwm |= (1 << 3);
-		sio_data->skip_fan |= (1 << 3);
-
-		sio_data->internal |= (1 << 1); /* in7 is VSB */
 		sio_data->internal |= (1 << 3); /* in9 is AVCC */
 
 		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+	} else if (sio_data->type == it8620) {
+		int reg;
+
+		superio_select(GPIO);
+
+		/* Check for fan4, fan5 */
+		reg = superio_inb(IT87_SIO_GPIO2_REG);
+		if (!(reg & (1 << 5)))
+			sio_data->skip_fan |= (1 << 3);
+		if (!(reg & (1 << 4)))
+			sio_data->skip_fan |= (1 << 4);
+
+		/* Check for pwm3, fan3 */
+		reg = superio_inb(IT87_SIO_GPIO3_REG);
+		if (reg & (1 << 6))
+			sio_data->skip_pwm |= (1 << 2);
+		if (reg & (1 << 7))
+			sio_data->skip_fan |= (1 << 2);
+
+		/* Check for pwm2, fan2 */
+		reg = superio_inb(IT87_SIO_GPIO5_REG);
+		if (reg & (1 << 1))
+			sio_data->skip_pwm |= (1 << 1);
+		if (reg & (1 << 2))
+			sio_data->skip_fan |= (1 << 1);
+
+		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
 	} else {
 		int reg;
 		bool uart6;
@@ -1917,15 +2052,7 @@
 		superio_select(GPIO);
 
 		reg = superio_inb(IT87_SIO_GPIO3_REG);
-		if (sio_data->type == it8721 || sio_data->type == it8728 ||
-		    sio_data->type == it8771 || sio_data->type == it8772 ||
-		    sio_data->type == it8782) {
-			/*
-			 * IT8721F/IT8758E, and IT8782F don't have VID pins
-			 * at all, not sure about the IT8728F and compatibles.
-			 */
-			sio_data->skip_vid = 1;
-		} else {
+		if (!sio_data->skip_vid) {
 			/* We need at least 4 VID pins */
 			if (reg & 0x0f) {
 				pr_info("VID is disabled (pins used for GPIO)\n");
@@ -1975,10 +2102,7 @@
 		}
 		if (reg & (1 << 0))
 			sio_data->internal |= (1 << 0);
-		if ((reg & (1 << 1)) || sio_data->type == it8721 ||
-		    sio_data->type == it8728 ||
-		    sio_data->type == it8771 ||
-		    sio_data->type == it8772)
+		if (reg & (1 << 1))
 			sio_data->internal |= (1 << 1);
 
 		/*
@@ -2050,7 +2174,7 @@
 			sysfs_remove_file(&dev->kobj,
 					  it87_attributes_temp_beep[i]);
 	}
-	for (i = 0; i < 5; i++) {
+	for (i = 0; i < 6; i++) {
 		if (!(data->has_fan & (1 << i)))
 			continue;
 		sysfs_remove_group(&dev->kobj, &it87_group_fan[i]);
@@ -2062,7 +2186,7 @@
 					  it87_attributes_fan_div[i]);
 	}
 	for (i = 0; i < 3; i++) {
-		if (sio_data->skip_pwm & (1 << 0))
+		if (sio_data->skip_pwm & (1 << i))
 			continue;
 		sysfs_remove_group(&dev->kobj, &it87_group_pwm[i]);
 		if (has_old_autopwm(data))
@@ -2112,13 +2236,14 @@
 	case it87:
 		if (sio_data->revision >= 0x03) {
 			data->features &= ~FEAT_OLD_AUTOPWM;
-			data->features |= FEAT_16BIT_FANS;
+			data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS;
 		}
 		break;
 	case it8712:
 		if (sio_data->revision >= 0x08) {
 			data->features &= ~FEAT_OLD_AUTOPWM;
-			data->features |= FEAT_16BIT_FANS;
+			data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS |
+					  FEAT_FIVE_FANS;
 		}
 		break;
 	default:
@@ -2147,7 +2272,8 @@
 			data->in_scaled |= (1 << 8);	/* in8 is Vbat */
 		if (sio_data->internal & (1 << 3))
 			data->in_scaled |= (1 << 9);	/* in9 is AVCC */
-	} else if (sio_data->type == it8782 || sio_data->type == it8783) {
+	} else if (sio_data->type == it8781 || sio_data->type == it8782 ||
+		   sio_data->type == it8783) {
 		if (sio_data->internal & (1 << 0))
 			data->in_scaled |= (1 << 3);	/* in3 is VCC5V */
 		if (sio_data->internal & (1 << 1))
@@ -2205,7 +2331,7 @@
 
 	/* Do not create fan files for disabled fans */
 	fan_beep_need_rw = 1;
-	for (i = 0; i < 5; i++) {
+	for (i = 0; i < 6; i++) {
 		if (!(data->has_fan & (1 << i)))
 			continue;
 		err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]);
@@ -2450,24 +2576,26 @@
 	}
 	data->has_fan = (data->fan_main_ctrl >> 4) & 0x07;
 
-	/* Set tachometers to 16-bit mode if needed, IT8603E (and IT8728F?)
-	 * has it by default */
-	if (has_16bit_fans(data) && data->type != it8603) {
-		tmp = it87_read_value(data, IT87_REG_FAN_16BIT);
+	tmp = it87_read_value(data, IT87_REG_FAN_16BIT);
+
+	/* Set tachometers to 16-bit mode if needed */
+	if (has_fan16_config(data)) {
 		if (~tmp & 0x07 & data->has_fan) {
 			dev_dbg(&pdev->dev,
 				"Setting fan1-3 to 16-bit mode\n");
 			it87_write_value(data, IT87_REG_FAN_16BIT,
 					 tmp | 0x07);
 		}
-		/* IT8705F, IT8782F, and IT8783E/F only support three fans. */
-		if (data->type != it87 && data->type != it8782 &&
-		    data->type != it8783) {
-			if (tmp & (1 << 4))
-				data->has_fan |= (1 << 3); /* fan4 enabled */
-			if (tmp & (1 << 5))
-				data->has_fan |= (1 << 4); /* fan5 enabled */
-		}
+	}
+
+	/* Check for additional fans */
+	if (has_five_fans(data)) {
+		if (tmp & (1 << 4))
+			data->has_fan |= (1 << 3); /* fan4 enabled */
+		if (tmp & (1 << 5))
+			data->has_fan |= (1 << 4); /* fan5 enabled */
+		if (has_six_fans(data) && (tmp & (1 << 2)))
+			data->has_fan |= (1 << 5); /* fan6 enabled */
 	}
 
 	/* Fan input pins may be used for alternative functions */
@@ -2535,7 +2663,7 @@
 		if (data->type == it8603)
 			data->in[9][0] = it87_read_value(data, 0x2f);
 
-		for (i = 0; i < 5; i++) {
+		for (i = 0; i < 6; i++) {
 			/* Skip disabled fans */
 			if (!(data->has_fan & (1 << i)))
 				continue;
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 996bdfd..9887d32 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -87,11 +87,14 @@
 #define AT30TSE004_DEVID_MASK	0xffff
 
 /* IDT */
-#define TS3000B3_DEVID		0x2903  /* Also matches TSE2002B3 */
-#define TS3000B3_DEVID_MASK	0xffff
+#define TSE2004_DEVID		0x2200
+#define TSE2004_DEVID_MASK	0xff00
 
-#define TS3000GB2_DEVID		0x2912  /* Also matches TSE2002GB2 */
-#define TS3000GB2_DEVID_MASK	0xffff
+#define TS3000_DEVID		0x2900  /* Also matches TSE2002 */
+#define TS3000_DEVID_MASK	0xff00
+
+#define TS3001_DEVID		0x3000
+#define TS3001_DEVID_MASK	0xff00
 
 /* Maxim */
 #define MAX6604_DEVID		0x3e00
@@ -152,8 +155,9 @@
 	{ ADT_MANID, ADT7408_DEVID, ADT7408_DEVID_MASK },
 	{ ATMEL_MANID, AT30TS00_DEVID, AT30TS00_DEVID_MASK },
 	{ ATMEL_MANID2, AT30TSE004_DEVID, AT30TSE004_DEVID_MASK },
-	{ IDT_MANID, TS3000B3_DEVID, TS3000B3_DEVID_MASK },
-	{ IDT_MANID, TS3000GB2_DEVID, TS3000GB2_DEVID_MASK },
+	{ IDT_MANID, TSE2004_DEVID, TSE2004_DEVID_MASK },
+	{ IDT_MANID, TS3000_DEVID, TS3000_DEVID_MASK },
+	{ IDT_MANID, TS3001_DEVID, TS3001_DEVID_MASK },
 	{ MAX_MANID, MAX6604_DEVID, MAX6604_DEVID_MASK },
 	{ MCP_MANID, MCP9804_DEVID, MCP9804_DEVID_MASK },
 	{ MCP_MANID, MCP98242_DEVID, MCP98242_DEVID_MASK },
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 1be4117..4fcb481 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -57,6 +57,7 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/io.h>
 #include "lm75.h"
 
@@ -880,12 +881,12 @@
 	u16 have_temp;
 	u16 have_temp_fixed;
 	u16 have_in;
-#ifdef CONFIG_PM
+
 	/* Remember extra register values over suspend/resume */
 	u8 vbat;
 	u8 fandiv1;
 	u8 fandiv2;
-#endif
+	u8 sio_reg_enable;
 };
 
 struct nct6775_sio_data {
@@ -3178,6 +3179,10 @@
 	int sioreg = data->sioreg;
 	int regval;
 
+	/* Store SIO_REG_ENABLE for use during resume */
+	superio_select(sioreg, NCT6775_LD_HWM);
+	data->sio_reg_enable = superio_inb(sioreg, SIO_REG_ENABLE);
+
 	/* fan4 and fan5 share some pins with the GPIO and serial flash */
 	if (data->kind == nct6775) {
 		regval = superio_inb(sioreg, 0x2c);
@@ -3195,21 +3200,38 @@
 		pwm6pin = false;
 	} else if (data->kind == nct6776) {
 		bool gpok = superio_inb(sioreg, 0x27) & 0x80;
+		const char *board_vendor, *board_name;
 
-		superio_select(sioreg, NCT6775_LD_HWM);
-		regval = superio_inb(sioreg, SIO_REG_ENABLE);
+		board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+		board_name = dmi_get_system_info(DMI_BOARD_NAME);
 
-		if (regval & 0x80)
+		if (board_name && board_vendor &&
+		    !strcmp(board_vendor, "ASRock")) {
+			/*
+			 * Auxiliary fan monitoring is not enabled on ASRock
+			 * Z77 Pro4-M if booted in UEFI Ultra-FastBoot mode.
+			 * Observed with BIOS version 2.00.
+			 */
+			if (!strcmp(board_name, "Z77 Pro4-M")) {
+				if ((data->sio_reg_enable & 0xe0) != 0xe0) {
+					data->sio_reg_enable |= 0xe0;
+					superio_outb(sioreg, SIO_REG_ENABLE,
+						     data->sio_reg_enable);
+				}
+			}
+		}
+
+		if (data->sio_reg_enable & 0x80)
 			fan3pin = gpok;
 		else
 			fan3pin = !(superio_inb(sioreg, 0x24) & 0x40);
 
-		if (regval & 0x40)
+		if (data->sio_reg_enable & 0x40)
 			fan4pin = gpok;
 		else
 			fan4pin = superio_inb(sioreg, 0x1C) & 0x01;
 
-		if (regval & 0x20)
+		if (data->sio_reg_enable & 0x20)
 			fan5pin = gpok;
 		else
 			fan5pin = superio_inb(sioreg, 0x1C) & 0x02;
@@ -3989,8 +4011,7 @@
 	}
 }
 
-#ifdef CONFIG_PM
-static int nct6775_suspend(struct device *dev)
+static int __maybe_unused nct6775_suspend(struct device *dev)
 {
 	struct nct6775_data *data = nct6775_update_device(dev);
 
@@ -4005,22 +4026,29 @@
 	return 0;
 }
 
-static int nct6775_resume(struct device *dev)
+static int __maybe_unused nct6775_resume(struct device *dev)
 {
 	struct nct6775_data *data = dev_get_drvdata(dev);
+	int sioreg = data->sioreg;
 	int i, j, err = 0;
+	u8 reg;
 
 	mutex_lock(&data->update_lock);
 	data->bank = 0xff;		/* Force initial bank selection */
 
-	if (data->kind == nct6791 || data->kind == nct6792) {
-		err = superio_enter(data->sioreg);
-		if (err)
-			goto abort;
+	err = superio_enter(sioreg);
+	if (err)
+		goto abort;
 
-		nct6791_enable_io_mapping(data->sioreg);
-		superio_exit(data->sioreg);
-	}
+	superio_select(sioreg, NCT6775_LD_HWM);
+	reg = superio_inb(sioreg, SIO_REG_ENABLE);
+	if (reg != data->sio_reg_enable)
+		superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable);
+
+	if (data->kind == nct6791 || data->kind == nct6792)
+		nct6791_enable_io_mapping(sioreg);
+
+	superio_exit(sioreg);
 
 	/* Restore limits */
 	for (i = 0; i < data->in_num; i++) {
@@ -4066,22 +4094,12 @@
 	return err;
 }
 
-static const struct dev_pm_ops nct6775_dev_pm_ops = {
-	.suspend = nct6775_suspend,
-	.resume = nct6775_resume,
-	.freeze = nct6775_suspend,
-	.restore = nct6775_resume,
-};
-
-#define NCT6775_DEV_PM_OPS	(&nct6775_dev_pm_ops)
-#else
-#define NCT6775_DEV_PM_OPS	NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
 
 static struct platform_driver nct6775_driver = {
 	.driver = {
 		.name	= DRVNAME,
-		.pm	= NCT6775_DEV_PM_OPS,
+		.pm	= &nct6775_dev_pm_ops,
 	},
 	.probe		= nct6775_probe,
 };
diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c
new file mode 100644
index 0000000..b77b82f
--- /dev/null
+++ b/drivers/hwmon/nct7904.c
@@ -0,0 +1,593 @@
+/*
+ * nct7904.c - driver for Nuvoton NCT7904D.
+ *
+ * Copyright (c) 2015 Kontron
+ * Author: Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+
+#define VENDOR_ID_REG		0x7A	/* Any bank */
+#define NUVOTON_ID		0x50
+#define CHIP_ID_REG		0x7B	/* Any bank */
+#define NCT7904_ID		0xC5
+#define DEVICE_ID_REG		0x7C	/* Any bank */
+
+#define BANK_SEL_REG		0xFF
+#define BANK_0			0x00
+#define BANK_1			0x01
+#define BANK_2			0x02
+#define BANK_3			0x03
+#define BANK_4			0x04
+#define BANK_MAX		0x04
+
+#define FANIN_MAX		12	/* Counted from 1 */
+#define VSEN_MAX		21	/* VSEN1..14, 3VDD, VBAT, V3VSB,
+					   LTD (not a voltage), VSEN17..19 */
+#define FANCTL_MAX		4	/* Counted from 1 */
+#define TCPU_MAX		8	/* Counted from 1 */
+#define TEMP_MAX		4	/* Counted from 1 */
+
+#define VT_ADC_CTRL0_REG	0x20	/* Bank 0 */
+#define VT_ADC_CTRL1_REG	0x21	/* Bank 0 */
+#define VT_ADC_CTRL2_REG	0x22	/* Bank 0 */
+#define FANIN_CTRL0_REG		0x24
+#define FANIN_CTRL1_REG		0x25
+#define DTS_T_CTRL0_REG		0x26
+#define DTS_T_CTRL1_REG		0x27
+#define VT_ADC_MD_REG		0x2E
+
+#define VSEN1_HV_REG		0x40	/* Bank 0; 2 regs (HV/LV) per sensor */
+#define TEMP_CH1_HV_REG		0x42	/* Bank 0; same as VSEN2_HV */
+#define LTD_HV_REG		0x62	/* Bank 0; 2 regs in VSEN range */
+#define FANIN1_HV_REG		0x80	/* Bank 0; 2 regs (HV/LV) per sensor */
+#define T_CPU1_HV_REG		0xA0	/* Bank 0; 2 regs (HV/LV) per sensor */
+
+#define PRTS_REG		0x03	/* Bank 2 */
+#define FANCTL1_FMR_REG		0x00	/* Bank 3; 1 reg per channel */
+#define FANCTL1_OUT_REG		0x10	/* Bank 3; 1 reg per channel */
+
+static const unsigned short normal_i2c[] = {
+	0x2d, 0x2e, I2C_CLIENT_END
+};
+
+struct nct7904_data {
+	struct i2c_client *client;
+	struct mutex bank_lock;
+	int bank_sel;
+	u32 fanin_mask;
+	u32 vsen_mask;
+	u32 tcpu_mask;
+	u8 fan_mode[FANCTL_MAX];
+};
+
+/* Access functions */
+static int nct7904_bank_lock(struct nct7904_data *data, unsigned bank)
+{
+	int ret;
+
+	mutex_lock(&data->bank_lock);
+	if (data->bank_sel == bank)
+		return 0;
+	ret = i2c_smbus_write_byte_data(data->client, BANK_SEL_REG, bank);
+	if (ret == 0)
+		data->bank_sel = bank;
+	else
+		data->bank_sel = -1;
+	return ret;
+}
+
+static inline void nct7904_bank_release(struct nct7904_data *data)
+{
+	mutex_unlock(&data->bank_lock);
+}
+
+/* Read 1-byte register. Returns unsigned reg or -ERRNO on error. */
+static int nct7904_read_reg(struct nct7904_data *data,
+			    unsigned bank, unsigned reg)
+{
+	struct i2c_client *client = data->client;
+	int ret;
+
+	ret = nct7904_bank_lock(data, bank);
+	if (ret == 0)
+		ret = i2c_smbus_read_byte_data(client, reg);
+
+	nct7904_bank_release(data);
+	return ret;
+}
+
+/*
+ * Read 2-byte register. Returns register in big-endian format or
+ * -ERRNO on error.
+ */
+static int nct7904_read_reg16(struct nct7904_data *data,
+			      unsigned bank, unsigned reg)
+{
+	struct i2c_client *client = data->client;
+	int ret, hi;
+
+	ret = nct7904_bank_lock(data, bank);
+	if (ret == 0) {
+		ret = i2c_smbus_read_byte_data(client, reg);
+		if (ret >= 0) {
+			hi = ret;
+			ret = i2c_smbus_read_byte_data(client, reg + 1);
+			if (ret >= 0)
+				ret |= hi << 8;
+		}
+	}
+
+	nct7904_bank_release(data);
+	return ret;
+}
+
+/* Write 1-byte register. Returns 0 or -ERRNO on error. */
+static int nct7904_write_reg(struct nct7904_data *data,
+			     unsigned bank, unsigned reg, u8 val)
+{
+	struct i2c_client *client = data->client;
+	int ret;
+
+	ret = nct7904_bank_lock(data, bank);
+	if (ret == 0)
+		ret = i2c_smbus_write_byte_data(client, reg, val);
+
+	nct7904_bank_release(data);
+	return ret;
+}
+
+/* FANIN ATTR */
+static ssize_t show_fan(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	int ret;
+	unsigned cnt, rpm;
+
+	ret = nct7904_read_reg16(data, BANK_0, FANIN1_HV_REG + index * 2);
+	if (ret < 0)
+		return ret;
+	cnt = ((ret & 0xff00) >> 3) | (ret & 0x1f);
+	if (cnt == 0x1fff)
+		rpm = 0;
+	else
+		rpm = 1350000 / cnt;
+	return sprintf(buf, "%u\n", rpm);
+}
+
+static umode_t nct7904_fanin_is_visible(struct kobject *kobj,
+					struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct nct7904_data *data = dev_get_drvdata(dev);
+
+	if (data->fanin_mask & (1 << n))
+		return a->mode;
+	return 0;
+}
+
+static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, show_fan, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan2_input, S_IRUGO, show_fan, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan3_input, S_IRUGO, show_fan, NULL, 2);
+static SENSOR_DEVICE_ATTR(fan4_input, S_IRUGO, show_fan, NULL, 3);
+static SENSOR_DEVICE_ATTR(fan5_input, S_IRUGO, show_fan, NULL, 4);
+static SENSOR_DEVICE_ATTR(fan6_input, S_IRUGO, show_fan, NULL, 5);
+static SENSOR_DEVICE_ATTR(fan7_input, S_IRUGO, show_fan, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan8_input, S_IRUGO, show_fan, NULL, 7);
+static SENSOR_DEVICE_ATTR(fan9_input, S_IRUGO, show_fan, NULL, 8);
+static SENSOR_DEVICE_ATTR(fan10_input, S_IRUGO, show_fan, NULL, 9);
+static SENSOR_DEVICE_ATTR(fan11_input, S_IRUGO, show_fan, NULL, 10);
+static SENSOR_DEVICE_ATTR(fan12_input, S_IRUGO, show_fan, NULL, 11);
+
+static struct attribute *nct7904_fanin_attrs[] = {
+	&sensor_dev_attr_fan1_input.dev_attr.attr,
+	&sensor_dev_attr_fan2_input.dev_attr.attr,
+	&sensor_dev_attr_fan3_input.dev_attr.attr,
+	&sensor_dev_attr_fan4_input.dev_attr.attr,
+	&sensor_dev_attr_fan5_input.dev_attr.attr,
+	&sensor_dev_attr_fan6_input.dev_attr.attr,
+	&sensor_dev_attr_fan7_input.dev_attr.attr,
+	&sensor_dev_attr_fan8_input.dev_attr.attr,
+	&sensor_dev_attr_fan9_input.dev_attr.attr,
+	&sensor_dev_attr_fan10_input.dev_attr.attr,
+	&sensor_dev_attr_fan11_input.dev_attr.attr,
+	&sensor_dev_attr_fan12_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group nct7904_fanin_group = {
+	.attrs = nct7904_fanin_attrs,
+	.is_visible = nct7904_fanin_is_visible,
+};
+
+/* VSEN ATTR */
+static ssize_t show_voltage(struct device *dev,
+			    struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	int ret;
+	int volt;
+
+	ret = nct7904_read_reg16(data, BANK_0, VSEN1_HV_REG + index * 2);
+	if (ret < 0)
+		return ret;
+	volt = ((ret & 0xff00) >> 5) | (ret & 0x7);
+	if (index < 14)
+		volt *= 2; /* 0.002V scale */
+	else
+		volt *= 6; /* 0.006V scale */
+
+	return sprintf(buf, "%d\n", volt);
+}
+
+static ssize_t show_ltemp(struct device *dev,
+			  struct device_attribute *devattr, char *buf)
+{
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	int ret;
+	int temp;
+
+	ret = nct7904_read_reg16(data, BANK_0, LTD_HV_REG);
+	if (ret < 0)
+		return ret;
+	temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
+	temp = sign_extend32(temp, 10) * 125;
+
+	return sprintf(buf, "%d\n", temp);
+}
+
+static umode_t nct7904_vsen_is_visible(struct kobject *kobj,
+				       struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct nct7904_data *data = dev_get_drvdata(dev);
+
+	if (data->vsen_mask & (1 << n))
+		return a->mode;
+	return 0;
+}
+
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_voltage, NULL, 0);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_voltage, NULL, 1);
+static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, show_voltage, NULL, 2);
+static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, show_voltage, NULL, 3);
+static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, show_voltage, NULL, 4);
+static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, show_voltage, NULL, 5);
+static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, show_voltage, NULL, 6);
+static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, show_voltage, NULL, 7);
+static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, show_voltage, NULL, 8);
+static SENSOR_DEVICE_ATTR(in10_input, S_IRUGO, show_voltage, NULL, 9);
+static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, show_voltage, NULL, 10);
+static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, show_voltage, NULL, 11);
+static SENSOR_DEVICE_ATTR(in13_input, S_IRUGO, show_voltage, NULL, 12);
+static SENSOR_DEVICE_ATTR(in14_input, S_IRUGO, show_voltage, NULL, 13);
+/*
+ * Next 3 voltage sensors have specific names in the Nuvoton doc
+ * (3VDD, VBAT, 3VSB) but we use vacant numbers for them.
+ */
+static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, show_voltage, NULL, 14);
+static SENSOR_DEVICE_ATTR(in16_input, S_IRUGO, show_voltage, NULL, 15);
+static SENSOR_DEVICE_ATTR(in20_input, S_IRUGO, show_voltage, NULL, 16);
+/* This is not a voltage, but a local temperature sensor. */
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_ltemp, NULL, 0);
+static SENSOR_DEVICE_ATTR(in17_input, S_IRUGO, show_voltage, NULL, 18);
+static SENSOR_DEVICE_ATTR(in18_input, S_IRUGO, show_voltage, NULL, 19);
+static SENSOR_DEVICE_ATTR(in19_input, S_IRUGO, show_voltage, NULL, 20);
+
+static struct attribute *nct7904_vsen_attrs[] = {
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_in3_input.dev_attr.attr,
+	&sensor_dev_attr_in4_input.dev_attr.attr,
+	&sensor_dev_attr_in5_input.dev_attr.attr,
+	&sensor_dev_attr_in6_input.dev_attr.attr,
+	&sensor_dev_attr_in7_input.dev_attr.attr,
+	&sensor_dev_attr_in8_input.dev_attr.attr,
+	&sensor_dev_attr_in9_input.dev_attr.attr,
+	&sensor_dev_attr_in10_input.dev_attr.attr,
+	&sensor_dev_attr_in11_input.dev_attr.attr,
+	&sensor_dev_attr_in12_input.dev_attr.attr,
+	&sensor_dev_attr_in13_input.dev_attr.attr,
+	&sensor_dev_attr_in14_input.dev_attr.attr,
+	&sensor_dev_attr_in15_input.dev_attr.attr,
+	&sensor_dev_attr_in16_input.dev_attr.attr,
+	&sensor_dev_attr_in20_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_in17_input.dev_attr.attr,
+	&sensor_dev_attr_in18_input.dev_attr.attr,
+	&sensor_dev_attr_in19_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group nct7904_vsen_group = {
+	.attrs = nct7904_vsen_attrs,
+	.is_visible = nct7904_vsen_is_visible,
+};
+
+/* CPU_TEMP ATTR */
+static ssize_t show_tcpu(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	int ret;
+	int temp;
+
+	ret = nct7904_read_reg16(data, BANK_0, T_CPU1_HV_REG + index * 2);
+	if (ret < 0)
+		return ret;
+
+	temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
+	temp = sign_extend32(temp, 10) * 125;
+	return sprintf(buf, "%d\n", temp);
+}
+
+static umode_t nct7904_tcpu_is_visible(struct kobject *kobj,
+				       struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct nct7904_data *data = dev_get_drvdata(dev);
+
+	if (data->tcpu_mask & (1 << n))
+		return a->mode;
+	return 0;
+}
+
+/* "temp1_input" reserved for local temp */
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_tcpu, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_tcpu, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, show_tcpu, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp5_input, S_IRUGO, show_tcpu, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp6_input, S_IRUGO, show_tcpu, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp7_input, S_IRUGO, show_tcpu, NULL, 5);
+static SENSOR_DEVICE_ATTR(temp8_input, S_IRUGO, show_tcpu, NULL, 6);
+static SENSOR_DEVICE_ATTR(temp9_input, S_IRUGO, show_tcpu, NULL, 7);
+
+static struct attribute *nct7904_tcpu_attrs[] = {
+	&sensor_dev_attr_temp2_input.dev_attr.attr,
+	&sensor_dev_attr_temp3_input.dev_attr.attr,
+	&sensor_dev_attr_temp4_input.dev_attr.attr,
+	&sensor_dev_attr_temp5_input.dev_attr.attr,
+	&sensor_dev_attr_temp6_input.dev_attr.attr,
+	&sensor_dev_attr_temp7_input.dev_attr.attr,
+	&sensor_dev_attr_temp8_input.dev_attr.attr,
+	&sensor_dev_attr_temp9_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group nct7904_tcpu_group = {
+	.attrs = nct7904_tcpu_attrs,
+	.is_visible = nct7904_tcpu_is_visible,
+};
+
+/* PWM ATTR */
+static ssize_t store_pwm(struct device *dev, struct device_attribute *devattr,
+			 const char *buf, size_t count)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+	if (val > 255)
+		return -EINVAL;
+
+	ret = nct7904_write_reg(data, BANK_3, FANCTL1_OUT_REG + index, val);
+
+	return ret ? ret : count;
+}
+
+static ssize_t show_pwm(struct device *dev,
+			struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	int val;
+
+	val = nct7904_read_reg(data, BANK_3, FANCTL1_OUT_REG + index);
+	if (val < 0)
+		return val;
+
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t store_mode(struct device *dev, struct device_attribute *devattr,
+			  const char *buf, size_t count)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+	if (val > 1 || (val && !data->fan_mode[index]))
+		return -EINVAL;
+
+	ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + index,
+				val ? data->fan_mode[index] : 0);
+
+	return ret ? ret : count;
+}
+
+/* Return 0 for manual mode or 1 for SmartFan mode */
+static ssize_t show_mode(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct nct7904_data *data = dev_get_drvdata(dev);
+	int val;
+
+	val = nct7904_read_reg(data, BANK_3, FANCTL1_FMR_REG + index);
+	if (val < 0)
+		return val;
+
+	return sprintf(buf, "%d\n", val ? 1 : 0);
+}
+
+/* 2 attributes per channel: pwm and mode */
+static SENSOR_DEVICE_ATTR(fan1_pwm, S_IRUGO | S_IWUSR,
+			show_pwm, store_pwm, 0);
+static SENSOR_DEVICE_ATTR(fan1_mode, S_IRUGO | S_IWUSR,
+			show_mode, store_mode, 0);
+static SENSOR_DEVICE_ATTR(fan2_pwm, S_IRUGO | S_IWUSR,
+			show_pwm, store_pwm, 1);
+static SENSOR_DEVICE_ATTR(fan2_mode, S_IRUGO | S_IWUSR,
+			show_mode, store_mode, 1);
+static SENSOR_DEVICE_ATTR(fan3_pwm, S_IRUGO | S_IWUSR,
+			show_pwm, store_pwm, 2);
+static SENSOR_DEVICE_ATTR(fan3_mode, S_IRUGO | S_IWUSR,
+			show_mode, store_mode, 2);
+static SENSOR_DEVICE_ATTR(fan4_pwm, S_IRUGO | S_IWUSR,
+			show_pwm, store_pwm, 3);
+static SENSOR_DEVICE_ATTR(fan4_mode, S_IRUGO | S_IWUSR,
+			show_mode, store_mode, 3);
+
+static struct attribute *nct7904_fanctl_attrs[] = {
+	&sensor_dev_attr_fan1_pwm.dev_attr.attr,
+	&sensor_dev_attr_fan1_mode.dev_attr.attr,
+	&sensor_dev_attr_fan2_pwm.dev_attr.attr,
+	&sensor_dev_attr_fan2_mode.dev_attr.attr,
+	&sensor_dev_attr_fan3_pwm.dev_attr.attr,
+	&sensor_dev_attr_fan3_mode.dev_attr.attr,
+	&sensor_dev_attr_fan4_pwm.dev_attr.attr,
+	&sensor_dev_attr_fan4_mode.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group nct7904_fanctl_group = {
+	.attrs = nct7904_fanctl_attrs,
+};
+
+static const struct attribute_group *nct7904_groups[] = {
+	&nct7904_fanin_group,
+	&nct7904_vsen_group,
+	&nct7904_tcpu_group,
+	&nct7904_fanctl_group,
+	NULL
+};
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int nct7904_detect(struct i2c_client *client,
+			  struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+
+	if (!i2c_check_functionality(adapter,
+				     I2C_FUNC_SMBUS_READ_BYTE |
+				     I2C_FUNC_SMBUS_WRITE_BYTE_DATA))
+		return -ENODEV;
+
+	/* Determine the chip type. */
+	if (i2c_smbus_read_byte_data(client, VENDOR_ID_REG) != NUVOTON_ID ||
+	    i2c_smbus_read_byte_data(client, CHIP_ID_REG) != NCT7904_ID ||
+	    (i2c_smbus_read_byte_data(client, DEVICE_ID_REG) & 0xf0) != 0x50 ||
+	    (i2c_smbus_read_byte_data(client, BANK_SEL_REG) & 0xf8) != 0x00)
+		return -ENODEV;
+
+	strlcpy(info->type, "nct7904", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int nct7904_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct nct7904_data *data;
+	struct device *hwmon_dev;
+	struct device *dev = &client->dev;
+	int ret, i;
+	u32 mask;
+
+	data = devm_kzalloc(dev, sizeof(struct nct7904_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+	mutex_init(&data->bank_lock);
+	data->bank_sel = -1;
+
+	/* Setup sensor groups. */
+	/* FANIN attributes */
+	ret = nct7904_read_reg16(data, BANK_0, FANIN_CTRL0_REG);
+	if (ret < 0)
+		return ret;
+	data->fanin_mask = (ret >> 8) | ((ret & 0xff) << 8);
+
+	/*
+	 * VSEN attributes
+	 *
+	 * Note: voltage sensors overlap with external temperature
+	 * sensors. So, if we ever decide to support the latter
+	 * we will have to adjust 'vsen_mask' accordingly.
+	 */
+	mask = 0;
+	ret = nct7904_read_reg16(data, BANK_0, VT_ADC_CTRL0_REG);
+	if (ret >= 0)
+		mask = (ret >> 8) | ((ret & 0xff) << 8);
+	ret = nct7904_read_reg(data, BANK_0, VT_ADC_CTRL2_REG);
+	if (ret >= 0)
+		mask |= (ret << 16);
+	data->vsen_mask = mask;
+
+	/* CPU_TEMP attributes */
+	ret = nct7904_read_reg16(data, BANK_0, DTS_T_CTRL0_REG);
+	if (ret < 0)
+		return ret;
+	data->tcpu_mask = ((ret >> 8) & 0xf) | ((ret & 0xf) << 4);
+
+	for (i = 0; i < FANCTL_MAX; i++) {
+		ret = nct7904_read_reg(data, BANK_3, FANCTL1_FMR_REG + i);
+		if (ret < 0)
+			return ret;
+		data->fan_mode[i] = ret;
+	}
+
+	hwmon_dev =
+		devm_hwmon_device_register_with_groups(dev, client->name, data,
+						       nct7904_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static const struct i2c_device_id nct7904_id[] = {
+	{"nct7904", 0},
+	{}
+};
+
+static struct i2c_driver nct7904_driver = {
+	.class = I2C_CLASS_HWMON,
+	.driver = {
+		.name = "nct7904",
+	},
+	.probe = nct7904_probe,
+	.id_table = nct7904_id,
+	.detect = nct7904_detect,
+	.address_list = normal_i2c,
+};
+
+module_i2c_driver(nct7904_driver);
+
+MODULE_AUTHOR("Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>");
+MODULE_DESCRIPTION("Hwmon driver for NUVOTON NCT7904");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c
index 1991d903..2d9a712 100644
--- a/drivers/hwmon/pwm-fan.c
+++ b/drivers/hwmon/pwm-fan.c
@@ -24,55 +24,78 @@
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/sysfs.h>
+#include <linux/thermal.h>
 
 #define MAX_PWM 255
 
 struct pwm_fan_ctx {
 	struct mutex lock;
 	struct pwm_device *pwm;
-	unsigned char pwm_value;
+	unsigned int pwm_value;
+	unsigned int pwm_fan_state;
+	unsigned int pwm_fan_max_state;
+	unsigned int *pwm_fan_cooling_levels;
+	struct thermal_cooling_device *cdev;
 };
 
-static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
-		       const char *buf, size_t count)
+static int  __set_pwm(struct pwm_fan_ctx *ctx, unsigned long pwm)
 {
-	struct pwm_fan_ctx *ctx = dev_get_drvdata(dev);
-	unsigned long pwm, duty;
-	ssize_t ret;
-
-	if (kstrtoul(buf, 10, &pwm) || pwm > MAX_PWM)
-		return -EINVAL;
+	unsigned long duty;
+	int ret = 0;
 
 	mutex_lock(&ctx->lock);
-
 	if (ctx->pwm_value == pwm)
-		goto exit_set_pwm_no_change;
-
-	if (pwm == 0) {
-		pwm_disable(ctx->pwm);
-		goto exit_set_pwm;
-	}
+		goto exit_set_pwm_err;
 
 	duty = DIV_ROUND_UP(pwm * (ctx->pwm->period - 1), MAX_PWM);
 	ret = pwm_config(ctx->pwm, duty, ctx->pwm->period);
 	if (ret)
 		goto exit_set_pwm_err;
 
+	if (pwm == 0)
+		pwm_disable(ctx->pwm);
+
 	if (ctx->pwm_value == 0) {
 		ret = pwm_enable(ctx->pwm);
 		if (ret)
 			goto exit_set_pwm_err;
 	}
 
-exit_set_pwm:
 	ctx->pwm_value = pwm;
-exit_set_pwm_no_change:
-	ret = count;
 exit_set_pwm_err:
 	mutex_unlock(&ctx->lock);
 	return ret;
 }
 
+static void pwm_fan_update_state(struct pwm_fan_ctx *ctx, unsigned long pwm)
+{
+	int i;
+
+	for (i = 0; i < ctx->pwm_fan_max_state; ++i)
+		if (pwm < ctx->pwm_fan_cooling_levels[i + 1])
+			break;
+
+	ctx->pwm_fan_state = i;
+}
+
+static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct pwm_fan_ctx *ctx = dev_get_drvdata(dev);
+	unsigned long pwm;
+	int ret;
+
+	if (kstrtoul(buf, 10, &pwm) || pwm > MAX_PWM)
+		return -EINVAL;
+
+	ret = __set_pwm(ctx, pwm);
+	if (ret)
+		return ret;
+
+	pwm_fan_update_state(ctx, pwm);
+	return count;
+}
+
 static ssize_t show_pwm(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
@@ -91,10 +114,108 @@
 
 ATTRIBUTE_GROUPS(pwm_fan);
 
+/* thermal cooling device callbacks */
+static int pwm_fan_get_max_state(struct thermal_cooling_device *cdev,
+				 unsigned long *state)
+{
+	struct pwm_fan_ctx *ctx = cdev->devdata;
+
+	if (!ctx)
+		return -EINVAL;
+
+	*state = ctx->pwm_fan_max_state;
+
+	return 0;
+}
+
+static int pwm_fan_get_cur_state(struct thermal_cooling_device *cdev,
+				 unsigned long *state)
+{
+	struct pwm_fan_ctx *ctx = cdev->devdata;
+
+	if (!ctx)
+		return -EINVAL;
+
+	*state = ctx->pwm_fan_state;
+
+	return 0;
+}
+
+static int
+pwm_fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+{
+	struct pwm_fan_ctx *ctx = cdev->devdata;
+	int ret;
+
+	if (!ctx || (state > ctx->pwm_fan_max_state))
+		return -EINVAL;
+
+	if (state == ctx->pwm_fan_state)
+		return 0;
+
+	ret = __set_pwm(ctx, ctx->pwm_fan_cooling_levels[state]);
+	if (ret) {
+		dev_err(&cdev->device, "Cannot set pwm!\n");
+		return ret;
+	}
+
+	ctx->pwm_fan_state = state;
+
+	return ret;
+}
+
+static const struct thermal_cooling_device_ops pwm_fan_cooling_ops = {
+	.get_max_state = pwm_fan_get_max_state,
+	.get_cur_state = pwm_fan_get_cur_state,
+	.set_cur_state = pwm_fan_set_cur_state,
+};
+
+static int pwm_fan_of_get_cooling_data(struct device *dev,
+				       struct pwm_fan_ctx *ctx)
+{
+	struct device_node *np = dev->of_node;
+	int num, i, ret;
+
+	if (!of_find_property(np, "cooling-levels", NULL))
+		return 0;
+
+	ret = of_property_count_u32_elems(np, "cooling-levels");
+	if (ret <= 0) {
+		dev_err(dev, "Wrong data!\n");
+		return ret ? : -EINVAL;
+	}
+
+	num = ret;
+	ctx->pwm_fan_cooling_levels = devm_kzalloc(dev, num * sizeof(u32),
+						   GFP_KERNEL);
+	if (!ctx->pwm_fan_cooling_levels)
+		return -ENOMEM;
+
+	ret = of_property_read_u32_array(np, "cooling-levels",
+					 ctx->pwm_fan_cooling_levels, num);
+	if (ret) {
+		dev_err(dev, "Property 'cooling-levels' cannot be read!\n");
+		return ret;
+	}
+
+	for (i = 0; i < num; i++) {
+		if (ctx->pwm_fan_cooling_levels[i] > MAX_PWM) {
+			dev_err(dev, "PWM fan state[%d]:%d > %d\n", i,
+				ctx->pwm_fan_cooling_levels[i], MAX_PWM);
+			return -EINVAL;
+		}
+	}
+
+	ctx->pwm_fan_max_state = num - 1;
+
+	return 0;
+}
+
 static int pwm_fan_probe(struct platform_device *pdev)
 {
-	struct device *hwmon;
+	struct thermal_cooling_device *cdev;
 	struct pwm_fan_ctx *ctx;
+	struct device *hwmon;
 	int duty_cycle;
 	int ret;
 
@@ -136,6 +257,26 @@
 		pwm_disable(ctx->pwm);
 		return PTR_ERR(hwmon);
 	}
+
+	ret = pwm_fan_of_get_cooling_data(&pdev->dev, ctx);
+	if (ret)
+		return ret;
+
+	ctx->pwm_fan_state = ctx->pwm_fan_max_state;
+	if (IS_ENABLED(CONFIG_THERMAL)) {
+		cdev = thermal_of_cooling_device_register(pdev->dev.of_node,
+							  "pwm-fan", ctx,
+							  &pwm_fan_cooling_ops);
+		if (IS_ERR(cdev)) {
+			dev_err(&pdev->dev,
+				"Failed to register pwm-fan as cooling device");
+			pwm_disable(ctx->pwm);
+			return PTR_ERR(cdev);
+		}
+		ctx->cdev = cdev;
+		thermal_cdev_update(cdev);
+	}
+
 	return 0;
 }
 
@@ -143,6 +284,7 @@
 {
 	struct pwm_fan_ctx *ctx = platform_get_drvdata(pdev);
 
+	thermal_cooling_device_unregister(ctx->cdev);
 	if (ctx->pwm_value)
 		pwm_disable(ctx->pwm);
 	return 0;
@@ -177,7 +319,7 @@
 
 static SIMPLE_DEV_PM_OPS(pwm_fan_pm, pwm_fan_suspend, pwm_fan_resume);
 
-static struct of_device_id of_pwm_fan_match[] = {
+static const struct of_device_id of_pwm_fan_match[] = {
 	{ .compatible = "pwm-fan", },
 	{},
 };
diff --git a/drivers/hwmon/vexpress.c b/drivers/hwmon/vexpress.c
index cf1848b..8ba419d 100644
--- a/drivers/hwmon/vexpress.c
+++ b/drivers/hwmon/vexpress.c
@@ -193,7 +193,7 @@
 	},
 };
 
-static struct of_device_id vexpress_hwmon_of_match[] = {
+static const struct of_device_id vexpress_hwmon_of_match[] = {
 #if !defined(CONFIG_REGULATOR_VEXPRESS)
 	{
 		.compatible = "arm,vexpress-volt",
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index d9c9829..e1180fa 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -148,8 +148,8 @@
 			printk(KERN_CONT "DataRequest ");
 		if (stat & ATA_CORR)
 			printk(KERN_CONT "CorrectedError ");
-		if (stat & ATA_IDX)
-			printk(KERN_CONT "Index ");
+		if (stat & ATA_SENSE)
+			printk(KERN_CONT "Sense ");
 		if (stat & ATA_ERR)
 			printk(KERN_CONT "Error ");
 	}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a3d3b17..0b63fac 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -273,7 +273,7 @@
 	    (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0) {
 		a = tp_ops->read_altstatus(hwif);
 		s = tp_ops->read_status(hwif);
-		if ((a ^ s) & ~ATA_IDX)
+		if ((a ^ s) & ~ATA_SENSE)
 			/* ancient Seagate drives, broken interfaces */
 			printk(KERN_INFO "%s: probing with STATUS(0x%02x) "
 					 "instead of ALTSTATUS(0x%02x)\n",
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b0e5852..5c979d0 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -55,7 +55,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <trace/events/power.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
@@ -638,12 +638,12 @@
 		leave_mm(cpu);
 
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+		tick_broadcast_enter();
 
 	mwait_idle_with_hints(eax, ecx);
 
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		tick_broadcast_exit();
 
 	return index;
 }
@@ -665,13 +665,12 @@
 
 static void __setup_broadcast_timer(void *arg)
 {
-	unsigned long reason = (unsigned long)arg;
-	int cpu = smp_processor_id();
+	unsigned long on = (unsigned long)arg;
 
-	reason = reason ?
-		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
-
-	clockevents_notify(reason, &cpu);
+	if (on)
+		tick_broadcast_enable();
+	else
+		tick_broadcast_disable();
 }
 
 static int cpu_hotplug_notify(struct notifier_block *n,
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 46379b1..d82af14 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -135,6 +135,15 @@
 	  device. Depending on platform configuration, this general purpose ADC can
 	  be used for sampling sensors such as thermal resistors.
 
+config DA9150_GPADC
+	tristate "Dialog DA9150 GPADC driver support"
+	depends on MFD_DA9150
+	help
+	  Say yes here to build support for Dialog DA9150 GPADC.
+
+	  This driver can also be built as a module. If chosen, the module name
+	  will be da9150-gpadc.
+
 config CC10001_ADC
 	tristate "Cosmic Circuits 10001 ADC driver"
 	depends on HAVE_CLK || REGULATOR
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 0315af6..3930e63 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -15,6 +15,7 @@
 obj-$(CONFIG_AD799X) += ad799x.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_AXP288_ADC) += axp288_adc.o
+obj-$(CONFIG_DA9150_GPADC) += da9150-gpadc.o
 obj-$(CONFIG_CC10001_ADC) += cc10001_adc.o
 obj-$(CONFIG_EXYNOS_ADC) += exynos_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
diff --git a/drivers/iio/adc/da9150-gpadc.c b/drivers/iio/adc/da9150-gpadc.c
new file mode 100644
index 0000000..3445107
--- /dev/null
+++ b/drivers/iio/adc/da9150-gpadc.c
@@ -0,0 +1,407 @@
+/*
+ * DA9150 GPADC Driver
+ *
+ * Copyright (c) 2014 Dialog Semiconductor
+ *
+ * Author: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/machine.h>
+#include <linux/iio/driver.h>
+#include <linux/mfd/da9150/core.h>
+#include <linux/mfd/da9150/registers.h>
+
+/* Channels */
+enum da9150_gpadc_hw_channel {
+	DA9150_GPADC_HW_CHAN_GPIOA_2V = 0,
+	DA9150_GPADC_HW_CHAN_GPIOA_2V_,
+	DA9150_GPADC_HW_CHAN_GPIOB_2V,
+	DA9150_GPADC_HW_CHAN_GPIOB_2V_,
+	DA9150_GPADC_HW_CHAN_GPIOC_2V,
+	DA9150_GPADC_HW_CHAN_GPIOC_2V_,
+	DA9150_GPADC_HW_CHAN_GPIOD_2V,
+	DA9150_GPADC_HW_CHAN_GPIOD_2V_,
+	DA9150_GPADC_HW_CHAN_IBUS_SENSE,
+	DA9150_GPADC_HW_CHAN_IBUS_SENSE_,
+	DA9150_GPADC_HW_CHAN_VBUS_DIV,
+	DA9150_GPADC_HW_CHAN_VBUS_DIV_,
+	DA9150_GPADC_HW_CHAN_ID,
+	DA9150_GPADC_HW_CHAN_ID_,
+	DA9150_GPADC_HW_CHAN_VSYS,
+	DA9150_GPADC_HW_CHAN_VSYS_,
+	DA9150_GPADC_HW_CHAN_GPIOA_6V,
+	DA9150_GPADC_HW_CHAN_GPIOA_6V_,
+	DA9150_GPADC_HW_CHAN_GPIOB_6V,
+	DA9150_GPADC_HW_CHAN_GPIOB_6V_,
+	DA9150_GPADC_HW_CHAN_GPIOC_6V,
+	DA9150_GPADC_HW_CHAN_GPIOC_6V_,
+	DA9150_GPADC_HW_CHAN_GPIOD_6V,
+	DA9150_GPADC_HW_CHAN_GPIOD_6V_,
+	DA9150_GPADC_HW_CHAN_VBAT,
+	DA9150_GPADC_HW_CHAN_VBAT_,
+	DA9150_GPADC_HW_CHAN_TBAT,
+	DA9150_GPADC_HW_CHAN_TBAT_,
+	DA9150_GPADC_HW_CHAN_TJUNC_CORE,
+	DA9150_GPADC_HW_CHAN_TJUNC_CORE_,
+	DA9150_GPADC_HW_CHAN_TJUNC_OVP,
+	DA9150_GPADC_HW_CHAN_TJUNC_OVP_,
+};
+
+enum da9150_gpadc_channel {
+	DA9150_GPADC_CHAN_GPIOA = 0,
+	DA9150_GPADC_CHAN_GPIOB,
+	DA9150_GPADC_CHAN_GPIOC,
+	DA9150_GPADC_CHAN_GPIOD,
+	DA9150_GPADC_CHAN_IBUS,
+	DA9150_GPADC_CHAN_VBUS,
+	DA9150_GPADC_CHAN_VSYS,
+	DA9150_GPADC_CHAN_VBAT,
+	DA9150_GPADC_CHAN_TBAT,
+	DA9150_GPADC_CHAN_TJUNC_CORE,
+	DA9150_GPADC_CHAN_TJUNC_OVP,
+};
+
+/* Private data */
+struct da9150_gpadc {
+	struct da9150 *da9150;
+	struct device *dev;
+
+	struct mutex lock;
+	struct completion complete;
+};
+
+
+static irqreturn_t da9150_gpadc_irq(int irq, void *data)
+{
+
+	struct da9150_gpadc *gpadc = data;
+
+	complete(&gpadc->complete);
+
+	return IRQ_HANDLED;
+}
+
+static int da9150_gpadc_read_adc(struct da9150_gpadc *gpadc, int hw_chan)
+{
+	u8 result_regs[2];
+	int result;
+
+	mutex_lock(&gpadc->lock);
+
+	/* Set channel & enable measurement */
+	da9150_reg_write(gpadc->da9150, DA9150_GPADC_MAN,
+			 (DA9150_GPADC_EN_MASK |
+			  hw_chan << DA9150_GPADC_MUX_SHIFT));
+
+	/* Consume left-over completion from a previous timeout */
+	try_wait_for_completion(&gpadc->complete);
+
+	/* Check for actual completion */
+	wait_for_completion_timeout(&gpadc->complete, msecs_to_jiffies(5));
+
+	/* Read result and status from device */
+	da9150_bulk_read(gpadc->da9150, DA9150_GPADC_RES_A, 2, result_regs);
+
+	mutex_unlock(&gpadc->lock);
+
+	/* Check to make sure device really has completed reading */
+	if (result_regs[1] & DA9150_GPADC_RUN_MASK) {
+		dev_err(gpadc->dev, "Timeout on channel %d of GPADC\n",
+			hw_chan);
+		return -ETIMEDOUT;
+	}
+
+	/* LSBs - 2 bits */
+	result = (result_regs[1] & DA9150_GPADC_RES_L_MASK) >>
+		 DA9150_GPADC_RES_L_SHIFT;
+	/* MSBs - 8 bits */
+	result |= result_regs[0] << DA9150_GPADC_RES_L_BITS;
+
+	return result;
+}
+
+static inline int da9150_gpadc_gpio_6v_voltage_now(int raw_val)
+{
+	/* Convert to mV */
+	return (6 * ((raw_val * 1000) + 500)) / 1024;
+}
+
+static inline int da9150_gpadc_ibus_current_avg(int raw_val)
+{
+	/* Convert to mA */
+	return (4 * ((raw_val * 1000) + 500)) / 2048;
+}
+
+static inline int da9150_gpadc_vbus_21v_voltage_now(int raw_val)
+{
+	/* Convert to mV */
+	return (21 * ((raw_val * 1000) + 500)) / 1024;
+}
+
+static inline int da9150_gpadc_vsys_6v_voltage_now(int raw_val)
+{
+	/* Convert to mV */
+	return (3 * ((raw_val * 1000) + 500)) / 512;
+}
+
+static int da9150_gpadc_read_processed(struct da9150_gpadc *gpadc, int channel,
+				       int hw_chan, int *val)
+{
+	int raw_val;
+
+	raw_val = da9150_gpadc_read_adc(gpadc, hw_chan);
+	if (raw_val < 0)
+		return raw_val;
+
+	switch (channel) {
+	case DA9150_GPADC_CHAN_GPIOA:
+	case DA9150_GPADC_CHAN_GPIOB:
+	case DA9150_GPADC_CHAN_GPIOC:
+	case DA9150_GPADC_CHAN_GPIOD:
+		*val = da9150_gpadc_gpio_6v_voltage_now(raw_val);
+		break;
+	case DA9150_GPADC_CHAN_IBUS:
+		*val = da9150_gpadc_ibus_current_avg(raw_val);
+		break;
+	case DA9150_GPADC_CHAN_VBUS:
+		*val = da9150_gpadc_vbus_21v_voltage_now(raw_val);
+		break;
+	case DA9150_GPADC_CHAN_VSYS:
+		*val = da9150_gpadc_vsys_6v_voltage_now(raw_val);
+		break;
+	default:
+		/* No processing for other channels so return raw value */
+		*val = raw_val;
+		break;
+	}
+
+	return IIO_VAL_INT;
+}
+
+static int da9150_gpadc_read_scale(int channel, int *val, int *val2)
+{
+	switch (channel) {
+	case DA9150_GPADC_CHAN_VBAT:
+		*val = 2932;
+		*val2 = 1000;
+		return IIO_VAL_FRACTIONAL;
+	case DA9150_GPADC_CHAN_TJUNC_CORE:
+	case DA9150_GPADC_CHAN_TJUNC_OVP:
+		*val = 1000000;
+		*val2 = 4420;
+		return IIO_VAL_FRACTIONAL;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int da9150_gpadc_read_offset(int channel, int *val)
+{
+	switch (channel) {
+	case DA9150_GPADC_CHAN_VBAT:
+		*val = 1500000 / 2932;
+		return IIO_VAL_INT;
+	case DA9150_GPADC_CHAN_TJUNC_CORE:
+	case DA9150_GPADC_CHAN_TJUNC_OVP:
+		*val = -144;
+		return IIO_VAL_INT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int da9150_gpadc_read_raw(struct iio_dev *indio_dev,
+				 struct iio_chan_spec const *chan,
+				 int *val, int *val2, long mask)
+{
+	struct da9150_gpadc *gpadc = iio_priv(indio_dev);
+
+	if ((chan->channel < DA9150_GPADC_CHAN_GPIOA) ||
+	    (chan->channel > DA9150_GPADC_CHAN_TJUNC_OVP))
+		return -EINVAL;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+	case IIO_CHAN_INFO_PROCESSED:
+		return da9150_gpadc_read_processed(gpadc, chan->channel,
+						   chan->address, val);
+	case IIO_CHAN_INFO_SCALE:
+		return da9150_gpadc_read_scale(chan->channel, val, val2);
+	case IIO_CHAN_INFO_OFFSET:
+		return da9150_gpadc_read_offset(chan->channel, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct iio_info da9150_gpadc_info = {
+	.read_raw = &da9150_gpadc_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+#define DA9150_GPADC_CHANNEL(_id, _hw_id, _type, chan_info,	\
+			     _ext_name) {			\
+	.type = _type,						\
+	.indexed = 1,						\
+	.channel = DA9150_GPADC_CHAN_##_id,			\
+	.address = DA9150_GPADC_HW_CHAN_##_hw_id,		\
+	.info_mask_separate = chan_info,			\
+	.extend_name = _ext_name,				\
+	.datasheet_name = #_id,					\
+}
+
+#define DA9150_GPADC_CHANNEL_RAW(_id, _hw_id, _type, _ext_name)	\
+	DA9150_GPADC_CHANNEL(_id, _hw_id, _type,		\
+			     BIT(IIO_CHAN_INFO_RAW), _ext_name)
+
+#define DA9150_GPADC_CHANNEL_SCALED(_id, _hw_id, _type, _ext_name)	\
+	DA9150_GPADC_CHANNEL(_id, _hw_id, _type,			\
+			     BIT(IIO_CHAN_INFO_RAW) |			\
+			     BIT(IIO_CHAN_INFO_SCALE) |			\
+			     BIT(IIO_CHAN_INFO_OFFSET),			\
+			     _ext_name)
+
+#define DA9150_GPADC_CHANNEL_PROCESSED(_id, _hw_id, _type, _ext_name)	\
+	DA9150_GPADC_CHANNEL(_id, _hw_id, _type,			\
+			     BIT(IIO_CHAN_INFO_PROCESSED), _ext_name)
+
+/* Supported channels */
+static const struct iio_chan_spec da9150_gpadc_channels[] = {
+	DA9150_GPADC_CHANNEL_PROCESSED(GPIOA, GPIOA_6V, IIO_VOLTAGE, NULL),
+	DA9150_GPADC_CHANNEL_PROCESSED(GPIOB, GPIOB_6V, IIO_VOLTAGE, NULL),
+	DA9150_GPADC_CHANNEL_PROCESSED(GPIOC, GPIOC_6V, IIO_VOLTAGE, NULL),
+	DA9150_GPADC_CHANNEL_PROCESSED(GPIOD, GPIOD_6V, IIO_VOLTAGE, NULL),
+	DA9150_GPADC_CHANNEL_PROCESSED(IBUS, IBUS_SENSE, IIO_CURRENT, "ibus"),
+	DA9150_GPADC_CHANNEL_PROCESSED(VBUS, VBUS_DIV_, IIO_VOLTAGE, "vbus"),
+	DA9150_GPADC_CHANNEL_PROCESSED(VSYS, VSYS, IIO_VOLTAGE, "vsys"),
+	DA9150_GPADC_CHANNEL_SCALED(VBAT, VBAT, IIO_VOLTAGE, "vbat"),
+	DA9150_GPADC_CHANNEL_RAW(TBAT, TBAT, IIO_VOLTAGE, "tbat"),
+	DA9150_GPADC_CHANNEL_SCALED(TJUNC_CORE, TJUNC_CORE, IIO_TEMP,
+				    "tjunc_core"),
+	DA9150_GPADC_CHANNEL_SCALED(TJUNC_OVP, TJUNC_OVP, IIO_TEMP,
+				    "tjunc_ovp"),
+};
+
+/* Default maps used by da9150-charger */
+static struct iio_map da9150_gpadc_default_maps[] = {
+	{
+		.consumer_dev_name = "da9150-charger",
+		.consumer_channel = "CHAN_IBUS",
+		.adc_channel_label = "IBUS",
+	},
+	{
+		.consumer_dev_name = "da9150-charger",
+		.consumer_channel = "CHAN_VBUS",
+		.adc_channel_label = "VBUS",
+	},
+	{
+		.consumer_dev_name = "da9150-charger",
+		.consumer_channel = "CHAN_TJUNC",
+		.adc_channel_label = "TJUNC_CORE",
+	},
+	{
+		.consumer_dev_name = "da9150-charger",
+		.consumer_channel = "CHAN_VBAT",
+		.adc_channel_label = "VBAT",
+	},
+	{},
+};
+
+static int da9150_gpadc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct da9150 *da9150 = dev_get_drvdata(dev->parent);
+	struct da9150_gpadc *gpadc;
+	struct iio_dev *indio_dev;
+	int irq, ret;
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*gpadc));
+	if (!indio_dev) {
+		dev_err(&pdev->dev, "Failed to allocate IIO device\n");
+		return -ENOMEM;
+	}
+	gpadc = iio_priv(indio_dev);
+
+	platform_set_drvdata(pdev, indio_dev);
+	gpadc->da9150 = da9150;
+	gpadc->dev = dev;
+	mutex_init(&gpadc->lock);
+	init_completion(&gpadc->complete);
+
+	irq = platform_get_irq_byname(pdev, "GPADC");
+	if (irq < 0) {
+		dev_err(dev, "Failed to get IRQ: %d\n", irq);
+		return irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq, NULL, da9150_gpadc_irq,
+					IRQF_ONESHOT, "GPADC", gpadc);
+	if (ret) {
+		dev_err(dev, "Failed to request IRQ %d: %d\n", irq, ret);
+		return ret;
+	}
+
+	ret = iio_map_array_register(indio_dev, da9150_gpadc_default_maps);
+	if (ret) {
+		dev_err(dev, "Failed to register IIO maps: %d\n", ret);
+		return ret;
+	}
+
+	indio_dev->name = dev_name(dev);
+	indio_dev->dev.parent = dev;
+	indio_dev->dev.of_node = pdev->dev.of_node;
+	indio_dev->info = &da9150_gpadc_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = da9150_gpadc_channels;
+	indio_dev->num_channels = ARRAY_SIZE(da9150_gpadc_channels);
+
+	ret = iio_device_register(indio_dev);
+	if (ret) {
+		dev_err(dev, "Failed to register IIO device: %d\n", ret);
+		goto iio_map_unreg;
+	}
+
+	return 0;
+
+iio_map_unreg:
+	iio_map_array_unregister(indio_dev);
+
+	return ret;
+}
+
+static int da9150_gpadc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	iio_map_array_unregister(indio_dev);
+
+	return 0;
+}
+
+static struct platform_driver da9150_gpadc_driver = {
+	.driver = {
+		.name = "da9150-gpadc",
+	},
+	.probe = da9150_gpadc_probe,
+	.remove = da9150_gpadc_remove,
+};
+
+module_platform_driver(da9150_gpadc_driver);
+
+MODULE_DESCRIPTION("GPADC Driver for DA9150");
+MODULE_AUTHOR("Adam Thomson <Adam.Thomson.Opensource@diasemi.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index af1dc6a..43429ab 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -133,19 +133,25 @@
 	return ops;
 }
 
-struct iommu_ops *of_iommu_configure(struct device *dev)
+struct iommu_ops *of_iommu_configure(struct device *dev,
+				     struct device_node *master_np)
 {
 	struct of_phandle_args iommu_spec;
 	struct device_node *np;
 	struct iommu_ops *ops = NULL;
 	int idx = 0;
 
+	if (dev_is_pci(dev)) {
+		dev_err(dev, "IOMMU is currently not supported for PCI\n");
+		return NULL;
+	}
+
 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
 	 * See the `Notes:' section of
 	 * Documentation/devicetree/bindings/iommu/iommu.txt
 	 */
-	while (!of_parse_phandle_with_args(dev->of_node, "iommus",
+	while (!of_parse_phandle_with_args(master_np, "iommus",
 					   "#iommu-cells", idx,
 					   &iommu_spec)) {
 		np = iommu_spec.np;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index cc79d2a..c8d260e 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -110,6 +110,13 @@
 	bool
 	select IRQ_DOMAIN
 
+config ST_IRQCHIP
+	bool
+	select REGMAP
+	select MFD_SYSCON
+	help
+	  Enables SysCfg Controlled IRQs on STi based platforms.
+
 config TB10X_IRQC
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 42965d2..552a740 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -6,6 +6,7 @@
 obj-$(CONFIG_ARCH_MMP)			+= irq-mmp.o
 obj-$(CONFIG_ARCH_MVEBU)		+= irq-armada-370-xp.o
 obj-$(CONFIG_ARCH_MXS)			+= irq-mxs.o
+obj-$(CONFIG_ARCH_TEGRA)		+= irq-tegra.o
 obj-$(CONFIG_ARCH_S3C24XX)		+= irq-s3c24xx.o
 obj-$(CONFIG_DW_APB_ICTL)		+= irq-dw-apb-ictl.o
 obj-$(CONFIG_METAG)			+= irq-metag-ext.o
@@ -33,10 +34,12 @@
 obj-$(CONFIG_VERSATILE_FPGA_IRQ)	+= irq-versatile-fpga.o
 obj-$(CONFIG_ARCH_NSPIRE)		+= irq-zevio.o
 obj-$(CONFIG_ARCH_VT8500)		+= irq-vt8500.o
+obj-$(CONFIG_ST_IRQCHIP)		+= irq-st.o
 obj-$(CONFIG_TB10X_IRQC)		+= irq-tb10x.o
 obj-$(CONFIG_XTENSA)			+= irq-xtensa-pic.o
 obj-$(CONFIG_XTENSA_MX)			+= irq-xtensa-mx.o
 obj-$(CONFIG_IRQ_CROSSBAR)		+= irq-crossbar.o
+obj-$(CONFIG_SOC_VF610)			+= irq-vf610-mscm-ir.o
 obj-$(CONFIG_BCM7120_L2_IRQ)		+= irq-bcm7120-l2.o
 obj-$(CONFIG_BRCMSTB_L2_IRQ)		+= irq-brcmstb-l2.o
 obj-$(CONFIG_KEYSTONE_IRQ)		+= irq-keystone.o
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 4387dae..daccc8b 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -38,6 +38,8 @@
 /* Interrupt Controller Registers Map */
 #define ARMADA_370_XP_INT_SET_MASK_OFFS		(0x48)
 #define ARMADA_370_XP_INT_CLEAR_MASK_OFFS	(0x4C)
+#define ARMADA_370_XP_INT_FABRIC_MASK_OFFS	(0x54)
+#define ARMADA_370_XP_INT_CAUSE_PERF(cpu)	(1 << cpu)
 
 #define ARMADA_370_XP_INT_CONTROL		(0x00)
 #define ARMADA_370_XP_INT_SET_ENABLE_OFFS	(0x30)
@@ -56,6 +58,7 @@
 #define ARMADA_370_XP_MAX_PER_CPU_IRQS		(28)
 
 #define ARMADA_370_XP_TIMER0_PER_CPU_IRQ	(5)
+#define ARMADA_370_XP_FABRIC_IRQ		(3)
 
 #define IPI_DOORBELL_START                      (0)
 #define IPI_DOORBELL_END                        (8)
@@ -77,6 +80,17 @@
 static phys_addr_t msi_doorbell_addr;
 #endif
 
+static inline bool is_percpu_irq(irq_hw_number_t irq)
+{
+	switch (irq) {
+	case ARMADA_370_XP_TIMER0_PER_CPU_IRQ:
+	case ARMADA_370_XP_FABRIC_IRQ:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * In SMP mode:
  * For shared global interrupts, mask/unmask global enable bit
@@ -86,7 +100,7 @@
 {
 	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 
-	if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ)
+	if (!is_percpu_irq(hwirq))
 		writel(hwirq, main_int_base +
 				ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS);
 	else
@@ -98,7 +112,7 @@
 {
 	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 
-	if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ)
+	if (!is_percpu_irq(hwirq))
 		writel(hwirq, main_int_base +
 				ARMADA_370_XP_INT_SET_ENABLE_OFFS);
 	else
@@ -281,20 +295,21 @@
 #ifdef CONFIG_SMP
 	.irq_set_affinity = armada_xp_set_affinity,
 #endif
+	.flags		= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND,
 };
 
 static int armada_370_xp_mpic_irq_map(struct irq_domain *h,
 				      unsigned int virq, irq_hw_number_t hw)
 {
 	armada_370_xp_irq_mask(irq_get_irq_data(virq));
-	if (hw != ARMADA_370_XP_TIMER0_PER_CPU_IRQ)
+	if (!is_percpu_irq(hw))
 		writel(hw, per_cpu_int_base +
 			ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
 	else
 		writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS);
 	irq_set_status_flags(virq, IRQ_LEVEL);
 
-	if (hw == ARMADA_370_XP_TIMER0_PER_CPU_IRQ) {
+	if (is_percpu_irq(hw)) {
 		irq_set_percpu_devid(virq);
 		irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip,
 					handle_percpu_devid_irq);
@@ -308,28 +323,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_SMP
-static void armada_mpic_send_doorbell(const struct cpumask *mask,
-				      unsigned int irq)
-{
-	int cpu;
-	unsigned long map = 0;
-
-	/* Convert our logical CPU mask into a physical one. */
-	for_each_cpu(cpu, mask)
-		map |= 1 << cpu_logical_map(cpu);
-
-	/*
-	 * Ensure that stores to Normal memory are visible to the
-	 * other CPUs before issuing the IPI.
-	 */
-	dsb();
-
-	/* submit softirq */
-	writel((map << 8) | irq, main_int_base +
-		ARMADA_370_XP_SW_TRIG_INT_OFFS);
-}
-
 static void armada_xp_mpic_smp_cpu_init(void)
 {
 	u32 control;
@@ -352,11 +345,44 @@
 	writel(0, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
 }
 
+static void armada_xp_mpic_perf_init(void)
+{
+	unsigned long cpuid = cpu_logical_map(smp_processor_id());
+
+	/* Enable Performance Counter Overflow interrupts */
+	writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid),
+	       per_cpu_int_base + ARMADA_370_XP_INT_FABRIC_MASK_OFFS);
+}
+
+#ifdef CONFIG_SMP
+static void armada_mpic_send_doorbell(const struct cpumask *mask,
+				      unsigned int irq)
+{
+	int cpu;
+	unsigned long map = 0;
+
+	/* Convert our logical CPU mask into a physical one. */
+	for_each_cpu(cpu, mask)
+		map |= 1 << cpu_logical_map(cpu);
+
+	/*
+	 * Ensure that stores to Normal memory are visible to the
+	 * other CPUs before issuing the IPI.
+	 */
+	dsb();
+
+	/* submit softirq */
+	writel((map << 8) | irq, main_int_base +
+		ARMADA_370_XP_SW_TRIG_INT_OFFS);
+}
+
 static int armada_xp_mpic_secondary_init(struct notifier_block *nfb,
 					 unsigned long action, void *hcpu)
 {
-	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
+		armada_xp_mpic_perf_init();
 		armada_xp_mpic_smp_cpu_init();
+	}
 
 	return NOTIFY_OK;
 }
@@ -369,8 +395,10 @@
 static int mpic_cascaded_secondary_init(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
-	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
+		armada_xp_mpic_perf_init();
 		enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
+	}
 
 	return NOTIFY_OK;
 }
@@ -379,7 +407,6 @@
 	.notifier_call = mpic_cascaded_secondary_init,
 	.priority = 100,
 };
-
 #endif /* CONFIG_SMP */
 
 static struct irq_domain_ops armada_370_xp_mpic_irq_ops = {
@@ -588,9 +615,9 @@
 
 	BUG_ON(!armada_370_xp_mpic_domain);
 
-#ifdef CONFIG_SMP
+	/* Setup for the boot CPU */
+	armada_xp_mpic_perf_init();
 	armada_xp_mpic_smp_cpu_init();
-#endif
 
 	armada_370_xp_msi_init(node, main_int_res.start);
 
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index bbbaf5d..692fe2b 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -11,11 +11,12 @@
  */
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/irqdomain.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/irqchip/irq-crossbar.h>
+
+#include "irqchip.h"
 
 #define IRQ_FREE	-1
 #define IRQ_RESERVED	-2
@@ -24,6 +25,7 @@
 
 /**
  * struct crossbar_device - crossbar device description
+ * @lock: spinlock serializing access to @irq_map
  * @int_max: maximum number of supported interrupts
  * @safe_map: safe default value to initialize the crossbar
  * @max_crossbar_sources: Maximum number of crossbar sources
@@ -33,6 +35,7 @@
  * @write: register write function pointer
  */
 struct crossbar_device {
+	raw_spinlock_t lock;
 	uint int_max;
 	uint safe_map;
 	uint max_crossbar_sources;
@@ -44,72 +47,101 @@
 
 static struct crossbar_device *cb;
 
-static inline void crossbar_writel(int irq_no, int cb_no)
+static void crossbar_writel(int irq_no, int cb_no)
 {
 	writel(cb_no, cb->crossbar_base + cb->register_offsets[irq_no]);
 }
 
-static inline void crossbar_writew(int irq_no, int cb_no)
+static void crossbar_writew(int irq_no, int cb_no)
 {
 	writew(cb_no, cb->crossbar_base + cb->register_offsets[irq_no]);
 }
 
-static inline void crossbar_writeb(int irq_no, int cb_no)
+static void crossbar_writeb(int irq_no, int cb_no)
 {
 	writeb(cb_no, cb->crossbar_base + cb->register_offsets[irq_no]);
 }
 
-static inline int get_prev_map_irq(int cb_no)
+static struct irq_chip crossbar_chip = {
+	.name			= "CBAR",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_wake		= irq_chip_set_wake_parent,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#endif
+};
+
+static int allocate_gic_irq(struct irq_domain *domain, unsigned virq,
+			    irq_hw_number_t hwirq)
 {
+	struct of_phandle_args args;
 	int i;
+	int err;
 
-	for (i = cb->int_max - 1; i >= 0; i--)
-		if (cb->irq_map[i] == cb_no)
-			return i;
-
-	return -ENODEV;
-}
-
-static inline int allocate_free_irq(int cb_no)
-{
-	int i;
-
+	raw_spin_lock(&cb->lock);
 	for (i = cb->int_max - 1; i >= 0; i--) {
 		if (cb->irq_map[i] == IRQ_FREE) {
-			cb->irq_map[i] = cb_no;
-			return i;
+			cb->irq_map[i] = hwirq;
+			break;
 		}
 	}
+	raw_spin_unlock(&cb->lock);
 
-	return -ENODEV;
+	if (i < 0)
+		return -ENODEV;
+
+	args.np = domain->parent->of_node;
+	args.args_count = 3;
+	args.args[0] = 0;	/* SPI */
+	args.args[1] = i;
+	args.args[2] = IRQ_TYPE_LEVEL_HIGH;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &args);
+	if (err)
+		cb->irq_map[i] = IRQ_FREE;
+	else
+		cb->write(i, hwirq);
+
+	return err;
 }
 
-static inline bool needs_crossbar_write(irq_hw_number_t hw)
+static int crossbar_domain_alloc(struct irq_domain *d, unsigned int virq,
+				 unsigned int nr_irqs, void *data)
 {
-	int cb_no;
+	struct of_phandle_args *args = data;
+	irq_hw_number_t hwirq;
+	int i;
 
-	if (hw > GIC_IRQ_START) {
-		cb_no = cb->irq_map[hw - GIC_IRQ_START];
-		if (cb_no != IRQ_RESERVED && cb_no != IRQ_SKIP)
-			return true;
+	if (args->args_count != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (args->args[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	hwirq = args->args[1];
+	if ((hwirq + nr_irqs) > cb->max_crossbar_sources)
+		return -EINVAL;	/* Can't deal with this */
+
+	for (i = 0; i < nr_irqs; i++) {
+		int err = allocate_gic_irq(d, virq + i, hwirq + i);
+
+		if (err)
+			return err;
+
+		irq_domain_set_hwirq_and_chip(d, virq + i, hwirq + i,
+					      &crossbar_chip, NULL);
 	}
 
-	return false;
-}
-
-static int crossbar_domain_map(struct irq_domain *d, unsigned int irq,
-			       irq_hw_number_t hw)
-{
-	if (needs_crossbar_write(hw))
-		cb->write(hw - GIC_IRQ_START, cb->irq_map[hw - GIC_IRQ_START]);
-
 	return 0;
 }
 
 /**
- * crossbar_domain_unmap - unmap a crossbar<->irq connection
- * @d: domain of irq to unmap
- * @irq: virq number
+ * crossbar_domain_free - unmap/free a crossbar<->irq connection
+ * @domain: domain of irq to unmap
+ * @virq: virq number
+ * @nr_irqs: number of irqs to free
  *
  * We do not maintain a use count of total number of map/unmap
  * calls for a particular irq to find out if a irq can be really
@@ -117,14 +149,20 @@
  * after which irq is anyways unusable. So an explicit map has to be called
  * after that.
  */
-static void crossbar_domain_unmap(struct irq_domain *d, unsigned int irq)
+static void crossbar_domain_free(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs)
 {
-	irq_hw_number_t hw = irq_get_irq_data(irq)->hwirq;
+	int i;
 
-	if (needs_crossbar_write(hw)) {
-		cb->irq_map[hw - GIC_IRQ_START] = IRQ_FREE;
-		cb->write(hw - GIC_IRQ_START, cb->safe_map);
+	raw_spin_lock(&cb->lock);
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+
+		irq_domain_reset_irq_data(d);
+		cb->irq_map[d->hwirq] = IRQ_FREE;
+		cb->write(d->hwirq, cb->safe_map);
 	}
+	raw_spin_unlock(&cb->lock);
 }
 
 static int crossbar_domain_xlate(struct irq_domain *d,
@@ -133,44 +171,22 @@
 				 unsigned long *out_hwirq,
 				 unsigned int *out_type)
 {
-	int ret;
-	int req_num = intspec[1];
-	int direct_map_num;
+	if (d->of_node != controller)
+		return -EINVAL;	/* Shouldn't happen, really... */
+	if (intsize != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (intspec[0] != 0)
+		return -EINVAL;	/* No PPI should point to this domain */
 
-	if (req_num >= cb->max_crossbar_sources) {
-		direct_map_num = req_num - cb->max_crossbar_sources;
-		if (direct_map_num < cb->int_max) {
-			ret = cb->irq_map[direct_map_num];
-			if (ret == IRQ_RESERVED || ret == IRQ_SKIP) {
-				/* We use the interrupt num as h/w irq num */
-				ret = direct_map_num;
-				goto found;
-			}
-		}
-
-		pr_err("%s: requested crossbar number %d > max %d\n",
-		       __func__, req_num, cb->max_crossbar_sources);
-		return -EINVAL;
-	}
-
-	ret = get_prev_map_irq(req_num);
-	if (ret >= 0)
-		goto found;
-
-	ret = allocate_free_irq(req_num);
-
-	if (ret < 0)
-		return ret;
-
-found:
-	*out_hwirq = ret + GIC_IRQ_START;
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2];
 	return 0;
 }
 
-static const struct irq_domain_ops routable_irq_domain_ops = {
-	.map = crossbar_domain_map,
-	.unmap = crossbar_domain_unmap,
-	.xlate = crossbar_domain_xlate
+static const struct irq_domain_ops crossbar_domain_ops = {
+	.alloc	= crossbar_domain_alloc,
+	.free	= crossbar_domain_free,
+	.xlate	= crossbar_domain_xlate,
 };
 
 static int __init crossbar_of_init(struct device_node *node)
@@ -293,7 +309,8 @@
 		cb->write(i, cb->safe_map);
 	}
 
-	register_routable_domain_ops(&routable_irq_domain_ops);
+	raw_spin_lock_init(&cb->lock);
+
 	return 0;
 
 err_reg_offset:
@@ -309,18 +326,37 @@
 	return ret;
 }
 
-static const struct of_device_id crossbar_match[] __initconst = {
-	{ .compatible = "ti,irq-crossbar" },
-	{}
-};
-
-int __init irqcrossbar_init(void)
+static int __init irqcrossbar_init(struct device_node *node,
+				   struct device_node *parent)
 {
-	struct device_node *np;
-	np = of_find_matching_node(NULL, crossbar_match);
-	if (!np)
-		return -ENODEV;
+	struct irq_domain *parent_domain, *domain;
+	int err;
 
-	crossbar_of_init(np);
+	if (!parent) {
+		pr_err("%s: no parent, giving up\n", node->full_name);
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%s: unable to obtain parent domain\n", node->full_name);
+		return -ENXIO;
+	}
+
+	err = crossbar_of_init(node);
+	if (err)
+		return err;
+
+	domain = irq_domain_add_hierarchy(parent_domain, 0,
+					  cb->max_crossbar_sources,
+					  node, &crossbar_domain_ops,
+					  NULL);
+	if (!domain) {
+		pr_err("%s: failed to allocated domain\n", node->full_name);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
+
+IRQCHIP_DECLARE(ti_irqcrossbar, "ti,irq-crossbar", irqcrossbar_init);
diff --git a/drivers/irqchip/irq-digicolor.c b/drivers/irqchip/irq-digicolor.c
index 930a2a2..3cbc658 100644
--- a/drivers/irqchip/irq-digicolor.c
+++ b/drivers/irqchip/irq-digicolor.c
@@ -55,8 +55,8 @@
 	} while (1);
 }
 
-static void digicolor_set_gc(void __iomem *reg_base, unsigned irq_base,
-			     unsigned en_reg, unsigned ack_reg)
+static void __init digicolor_set_gc(void __iomem *reg_base, unsigned irq_base,
+				    unsigned en_reg, unsigned ack_reg)
 {
 	struct irq_chip_generic *gc;
 
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index fd8850d..4f2fb62 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -195,6 +195,19 @@
 /*
  * Routines to disable, enable, EOI and route interrupts
  */
+static int gic_peek_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d))
+		base = gic_data_rdist_sgi_base();
+	else
+		base = gic_data.dist_base;
+
+	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
+}
+
 static void gic_poke_irq(struct irq_data *d, u32 offset)
 {
 	u32 mask = 1 << (gic_irq(d) % 32);
@@ -223,6 +236,61 @@
 	gic_poke_irq(d, GICD_ISENABLER);
 }
 
+static int gic_irq_set_irqchip_state(struct irq_data *d,
+				     enum irqchip_irq_state which, bool val)
+{
+	u32 reg;
+
+	if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */
+		return -EINVAL;
+
+	switch (which) {
+	case IRQCHIP_STATE_PENDING:
+		reg = val ? GICD_ISPENDR : GICD_ICPENDR;
+		break;
+
+	case IRQCHIP_STATE_ACTIVE:
+		reg = val ? GICD_ISACTIVER : GICD_ICACTIVER;
+		break;
+
+	case IRQCHIP_STATE_MASKED:
+		reg = val ? GICD_ICENABLER : GICD_ISENABLER;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	gic_poke_irq(d, reg);
+	return 0;
+}
+
+static int gic_irq_get_irqchip_state(struct irq_data *d,
+				     enum irqchip_irq_state which, bool *val)
+{
+	if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */
+		return -EINVAL;
+
+	switch (which) {
+	case IRQCHIP_STATE_PENDING:
+		*val = gic_peek_irq(d, GICD_ISPENDR);
+		break;
+
+	case IRQCHIP_STATE_ACTIVE:
+		*val = gic_peek_irq(d, GICD_ISACTIVER);
+		break;
+
+	case IRQCHIP_STATE_MASKED:
+		*val = !gic_peek_irq(d, GICD_ISENABLER);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void gic_eoi_irq(struct irq_data *d)
 {
 	gic_write_eoir(gic_irq(d));
@@ -418,19 +486,6 @@
 }
 
 #ifdef CONFIG_SMP
-static int gic_peek_irq(struct irq_data *d, u32 offset)
-{
-	u32 mask = 1 << (gic_irq(d) % 32);
-	void __iomem *base;
-
-	if (gic_irq_in_rdist(d))
-		base = gic_data_rdist_sgi_base();
-	else
-		base = gic_data.dist_base;
-
-	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
-}
-
 static int gic_secondary_init(struct notifier_block *nfb,
 			      unsigned long action, void *hcpu)
 {
@@ -601,6 +656,8 @@
 	.irq_eoi		= gic_eoi_irq,
 	.irq_set_type		= gic_set_type,
 	.irq_set_affinity	= gic_set_affinity,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
 };
 
 #define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 471e1cdc1..a6ce347 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -151,13 +151,24 @@
 /*
  * Routines to acknowledge, disable and enable interrupts
  */
-static void gic_mask_irq(struct irq_data *d)
+static void gic_poke_irq(struct irq_data *d, u32 offset)
 {
 	u32 mask = 1 << (gic_irq(d) % 32);
+	writel_relaxed(mask, gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4);
+}
+
+static int gic_peek_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	return !!(readl_relaxed(gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4) & mask);
+}
+
+static void gic_mask_irq(struct irq_data *d)
+{
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&irq_controller_lock, flags);
-	writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4);
+	gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
 	if (gic_arch_extn.irq_mask)
 		gic_arch_extn.irq_mask(d);
 	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
@@ -165,13 +176,12 @@
 
 static void gic_unmask_irq(struct irq_data *d)
 {
-	u32 mask = 1 << (gic_irq(d) % 32);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 	if (gic_arch_extn.irq_unmask)
 		gic_arch_extn.irq_unmask(d);
-	writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4);
+	gic_poke_irq(d, GIC_DIST_ENABLE_SET);
 	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
@@ -186,6 +196,55 @@
 	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
 }
 
+static int gic_irq_set_irqchip_state(struct irq_data *d,
+				     enum irqchip_irq_state which, bool val)
+{
+	u32 reg;
+
+	switch (which) {
+	case IRQCHIP_STATE_PENDING:
+		reg = val ? GIC_DIST_PENDING_SET : GIC_DIST_PENDING_CLEAR;
+		break;
+
+	case IRQCHIP_STATE_ACTIVE:
+		reg = val ? GIC_DIST_ACTIVE_SET : GIC_DIST_ACTIVE_CLEAR;
+		break;
+
+	case IRQCHIP_STATE_MASKED:
+		reg = val ? GIC_DIST_ENABLE_CLEAR : GIC_DIST_ENABLE_SET;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	gic_poke_irq(d, reg);
+	return 0;
+}
+
+static int gic_irq_get_irqchip_state(struct irq_data *d,
+				      enum irqchip_irq_state which, bool *val)
+{
+	switch (which) {
+	case IRQCHIP_STATE_PENDING:
+		*val = gic_peek_irq(d, GIC_DIST_PENDING_SET);
+		break;
+
+	case IRQCHIP_STATE_ACTIVE:
+		*val = gic_peek_irq(d, GIC_DIST_ACTIVE_SET);
+		break;
+
+	case IRQCHIP_STATE_MASKED:
+		*val = !gic_peek_irq(d, GIC_DIST_ENABLE_SET);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int gic_set_type(struct irq_data *d, unsigned int type)
 {
 	void __iomem *base = gic_dist_base(d);
@@ -329,6 +388,8 @@
 	.irq_set_affinity	= gic_set_affinity,
 #endif
 	.irq_set_wake		= gic_set_wake,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
 };
 
 void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
@@ -353,7 +414,7 @@
 			break;
 	}
 
-	if (!mask)
+	if (!mask && num_possible_cpus() > 1)
 		pr_crit("GIC CPU mask not found - kernel will fail to boot.\n");
 
 	return mask;
@@ -802,15 +863,12 @@
 		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-
-		gic_routable_irq_domain_ops->map(d, irq, hw);
 	}
 	return 0;
 }
 
 static void gic_irq_domain_unmap(struct irq_domain *d, unsigned int irq)
 {
-	gic_routable_irq_domain_ops->unmap(d, irq);
 }
 
 static int gic_irq_domain_xlate(struct irq_domain *d,
@@ -829,16 +887,8 @@
 	*out_hwirq = intspec[1] + 16;
 
 	/* For SPIs, we need to add 16 more to get the GIC irq ID number */
-	if (!intspec[0]) {
-		ret = gic_routable_irq_domain_ops->xlate(d, controller,
-							 intspec,
-							 intsize,
-							 out_hwirq,
-							 out_type);
-
-		if (IS_ERR_VALUE(ret))
-			return ret;
-	}
+	if (!intspec[0])
+		*out_hwirq += 16;
 
 	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
 
@@ -895,37 +945,11 @@
 	.xlate = gic_irq_domain_xlate,
 };
 
-/* Default functions for routable irq domain */
-static int gic_routable_irq_domain_map(struct irq_domain *d, unsigned int irq,
-			      irq_hw_number_t hw)
+void gic_set_irqchip_flags(unsigned long flags)
 {
-	return 0;
+	gic_chip.flags |= flags;
 }
 
-static void gic_routable_irq_domain_unmap(struct irq_domain *d,
-					  unsigned int irq)
-{
-}
-
-static int gic_routable_irq_domain_xlate(struct irq_domain *d,
-				struct device_node *controller,
-				const u32 *intspec, unsigned int intsize,
-				unsigned long *out_hwirq,
-				unsigned int *out_type)
-{
-	*out_hwirq += 16;
-	return 0;
-}
-
-static const struct irq_domain_ops gic_default_routable_irq_domain_ops = {
-	.map = gic_routable_irq_domain_map,
-	.unmap = gic_routable_irq_domain_unmap,
-	.xlate = gic_routable_irq_domain_xlate,
-};
-
-const struct irq_domain_ops *gic_routable_irq_domain_ops =
-					&gic_default_routable_irq_domain_ops;
-
 void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 			   void __iomem *dist_base, void __iomem *cpu_base,
 			   u32 percpu_offset, struct device_node *node)
@@ -933,7 +957,6 @@
 	irq_hw_number_t hwirq_base;
 	struct gic_chip_data *gic;
 	int gic_irqs, irq_base, i;
-	int nr_routable_irqs;
 
 	BUG_ON(gic_nr >= MAX_GIC_NR);
 
@@ -989,15 +1012,9 @@
 	gic->gic_irqs = gic_irqs;
 
 	if (node) {		/* DT case */
-		const struct irq_domain_ops *ops = &gic_irq_domain_hierarchy_ops;
-
-		if (!of_property_read_u32(node, "arm,routable-irqs",
-					  &nr_routable_irqs)) {
-			ops = &gic_irq_domain_ops;
-			gic_irqs = nr_routable_irqs;
-		}
-
-		gic->domain = irq_domain_add_linear(node, gic_irqs, ops, gic);
+		gic->domain = irq_domain_add_linear(node, gic_irqs,
+						    &gic_irq_domain_hierarchy_ops,
+						    gic);
 	} else {		/* Non-DT case */
 		/*
 		 * For primary GICs, skip over SGIs.
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 9acdc08..f2d269b 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -166,6 +166,27 @@
 
 	return (((cycle_t) hi) << 32) + lo;
 }
+
+void gic_start_count(void)
+{
+	u32 gicconfig;
+
+	/* Start the counter */
+	gicconfig = gic_read(GIC_REG(SHARED, GIC_SH_CONFIG));
+	gicconfig &= ~(1 << GIC_SH_CONFIG_COUNTSTOP_SHF);
+	gic_write(GIC_REG(SHARED, GIC_SH_CONFIG), gicconfig);
+}
+
+void gic_stop_count(void)
+{
+	u32 gicconfig;
+
+	/* Stop the counter */
+	gicconfig = gic_read(GIC_REG(SHARED, GIC_SH_CONFIG));
+	gicconfig |= 1 << GIC_SH_CONFIG_COUNTSTOP_SHF;
+	gic_write(GIC_REG(SHARED, GIC_SH_CONFIG), gicconfig);
+}
+
 #endif
 
 static bool gic_local_irq_is_routable(int intr)
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 384e6ed..cdf80b7 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -17,6 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/clk.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
@@ -29,15 +30,26 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/platform_data/irq-renesas-irqc.h>
+#include <linux/pm_runtime.h>
 
-#define IRQC_IRQ_MAX 32 /* maximum 32 interrupts per driver instance */
+#define IRQC_IRQ_MAX	32	/* maximum 32 interrupts per driver instance */
 
-#define IRQC_REQ_STS 0x00
-#define IRQC_EN_STS 0x04
-#define IRQC_EN_SET 0x08
+#define IRQC_REQ_STS	0x00	/* Interrupt Request Status Register */
+#define IRQC_EN_STS	0x04	/* Interrupt Enable Status Register */
+#define IRQC_EN_SET	0x08	/* Interrupt Enable Set Register */
 #define IRQC_INT_CPU_BASE(n) (0x000 + ((n) * 0x10))
-#define DETECT_STATUS 0x100
+				/* SYS-CPU vs. RT-CPU */
+#define DETECT_STATUS	0x100	/* IRQn Detect Status Register */
+#define MONITOR		0x104	/* IRQn Signal Level Monitor Register */
+#define HLVL_STS	0x108	/* IRQn High Level Detect Status Register */
+#define LLVL_STS	0x10c	/* IRQn Low Level Detect Status Register */
+#define S_R_EDGE_STS	0x110	/* IRQn Sync Rising Edge Detect Status Reg. */
+#define S_F_EDGE_STS	0x114	/* IRQn Sync Falling Edge Detect Status Reg. */
+#define A_R_EDGE_STS	0x118	/* IRQn Async Rising Edge Detect Status Reg. */
+#define A_F_EDGE_STS	0x11c	/* IRQn Async Falling Edge Detect Status Reg. */
+#define CHTEN_STS	0x120	/* Chattering Reduction Status Register */
 #define IRQC_CONFIG(n) (0x180 + ((n) * 0x04))
+				/* IRQn Configuration Register */
 
 struct irqc_irq {
 	int hw_irq;
@@ -55,6 +67,7 @@
 	struct platform_device *pdev;
 	struct irq_chip irq_chip;
 	struct irq_domain *irq_domain;
+	struct clk *clk;
 };
 
 static void irqc_dbg(struct irqc_irq *i, char *str)
@@ -94,7 +107,7 @@
 	struct irqc_priv *p = irq_data_get_irq_chip_data(d);
 	int hw_irq = irqd_to_hwirq(d);
 	unsigned char value = irqc_sense[type & IRQ_TYPE_SENSE_MASK];
-	unsigned long tmp;
+	u32 tmp;
 
 	irqc_dbg(&p->irq[hw_irq], "sense");
 
@@ -108,11 +121,26 @@
 	return 0;
 }
 
+static int irqc_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct irqc_priv *p = irq_data_get_irq_chip_data(d);
+
+	if (!p->clk)
+		return 0;
+
+	if (on)
+		clk_enable(p->clk);
+	else
+		clk_disable(p->clk);
+
+	return 0;
+}
+
 static irqreturn_t irqc_irq_handler(int irq, void *dev_id)
 {
 	struct irqc_irq *i = dev_id;
 	struct irqc_priv *p = i->p;
-	unsigned long bit = BIT(i->hw_irq);
+	u32 bit = BIT(i->hw_irq);
 
 	irqc_dbg(i, "demux1");
 
@@ -170,6 +198,15 @@
 	p->pdev = pdev;
 	platform_set_drvdata(pdev, p);
 
+	p->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(p->clk)) {
+		dev_warn(&pdev->dev, "unable to get clock\n");
+		p->clk = NULL;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
 	/* get hold of manadatory IOMEM */
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!io) {
@@ -210,7 +247,8 @@
 	irq_chip->irq_mask = irqc_irq_disable;
 	irq_chip->irq_unmask = irqc_irq_enable;
 	irq_chip->irq_set_type = irqc_irq_set_type;
-	irq_chip->flags	= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
+	irq_chip->irq_set_wake = irqc_irq_set_wake;
+	irq_chip->flags	= IRQCHIP_MASK_ON_SUSPEND;
 
 	p->irq_domain = irq_domain_add_simple(pdev->dev.of_node,
 					      p->number_of_irqs,
@@ -250,6 +288,8 @@
 err2:
 	iounmap(p->iomem);
 err1:
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 	kfree(p);
 err0:
 	return ret;
@@ -265,6 +305,8 @@
 
 	irq_domain_remove(p->irq_domain);
 	iounmap(p->iomem);
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 	kfree(p);
 	return 0;
 }
diff --git a/drivers/irqchip/irq-st.c b/drivers/irqchip/irq-st.c
new file mode 100644
index 0000000..9af48a8
--- /dev/null
+++ b/drivers/irqchip/irq-st.c
@@ -0,0 +1,206 @@
+/*
+ *  Copyright (C) 2014 STMicroelectronics – All Rights Reserved
+ *
+ *  Author: Lee Jones <lee.jones@linaro.org>
+ *
+ *  This is a re-write of Christophe Kerello's PMU driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/interrupt-controller/irq-st.h>
+#include <linux/err.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define STIH415_SYSCFG_642		0x0a8
+#define STIH416_SYSCFG_7543		0x87c
+#define STIH407_SYSCFG_5102		0x198
+#define STID127_SYSCFG_734		0x088
+
+#define ST_A9_IRQ_MASK			0x001FFFFF
+#define ST_A9_IRQ_MAX_CHANS		2
+
+#define ST_A9_IRQ_EN_CTI_0		BIT(0)
+#define ST_A9_IRQ_EN_CTI_1		BIT(1)
+#define ST_A9_IRQ_EN_PMU_0		BIT(2)
+#define ST_A9_IRQ_EN_PMU_1		BIT(3)
+#define ST_A9_IRQ_EN_PL310_L2		BIT(4)
+#define ST_A9_IRQ_EN_EXT_0		BIT(5)
+#define ST_A9_IRQ_EN_EXT_1		BIT(6)
+#define ST_A9_IRQ_EN_EXT_2		BIT(7)
+
+#define ST_A9_FIQ_N_SEL(dev, chan)	(dev << (8  + (chan * 3)))
+#define ST_A9_IRQ_N_SEL(dev, chan)	(dev << (14 + (chan * 3)))
+#define ST_A9_EXTIRQ_INV_SEL(dev)	(dev << 20)
+
+struct st_irq_syscfg {
+	struct regmap *regmap;
+	unsigned int syscfg;
+	unsigned int config;
+	bool ext_inverted;
+};
+
+static const struct of_device_id st_irq_syscfg_match[] = {
+	{
+		.compatible = "st,stih415-irq-syscfg",
+		.data = (void *)STIH415_SYSCFG_642,
+	},
+	{
+		.compatible = "st,stih416-irq-syscfg",
+		.data = (void *)STIH416_SYSCFG_7543,
+	},
+	{
+		.compatible = "st,stih407-irq-syscfg",
+		.data = (void *)STIH407_SYSCFG_5102,
+	},
+	{
+		.compatible = "st,stid127-irq-syscfg",
+		.data = (void *)STID127_SYSCFG_734,
+	},
+	{}
+};
+
+static int st_irq_xlate(struct platform_device *pdev,
+			int device, int channel, bool irq)
+{
+	struct st_irq_syscfg *ddata = dev_get_drvdata(&pdev->dev);
+
+	/* Set the device enable bit. */
+	switch (device) {
+	case ST_IRQ_SYSCFG_EXT_0:
+		ddata->config |= ST_A9_IRQ_EN_EXT_0;
+		break;
+	case ST_IRQ_SYSCFG_EXT_1:
+		ddata->config |= ST_A9_IRQ_EN_EXT_1;
+		break;
+	case ST_IRQ_SYSCFG_EXT_2:
+		ddata->config |= ST_A9_IRQ_EN_EXT_2;
+		break;
+	case ST_IRQ_SYSCFG_CTI_0:
+		ddata->config |= ST_A9_IRQ_EN_CTI_0;
+		break;
+	case ST_IRQ_SYSCFG_CTI_1:
+		ddata->config |= ST_A9_IRQ_EN_CTI_1;
+		break;
+	case ST_IRQ_SYSCFG_PMU_0:
+		ddata->config |= ST_A9_IRQ_EN_PMU_0;
+		break;
+	case ST_IRQ_SYSCFG_PMU_1:
+		ddata->config |= ST_A9_IRQ_EN_PMU_1;
+		break;
+	case ST_IRQ_SYSCFG_pl310_L2:
+		ddata->config |= ST_A9_IRQ_EN_PL310_L2;
+		break;
+	case ST_IRQ_SYSCFG_DISABLED:
+		return 0;
+	default:
+		dev_err(&pdev->dev, "Unrecognised device %d\n", device);
+		return -EINVAL;
+	}
+
+	/* Select IRQ/FIQ channel for device. */
+	ddata->config |= irq ?
+		ST_A9_IRQ_N_SEL(device, channel) :
+		ST_A9_FIQ_N_SEL(device, channel);
+
+	return 0;
+}
+
+static int st_irq_syscfg_enable(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct st_irq_syscfg *ddata = dev_get_drvdata(&pdev->dev);
+	int channels, ret, i;
+	u32 device, invert;
+
+	channels = of_property_count_u32_elems(np, "st,irq-device");
+	if (channels != ST_A9_IRQ_MAX_CHANS) {
+		dev_err(&pdev->dev, "st,enable-irq-device must have 2 elems\n");
+		return -EINVAL;
+	}
+
+	channels = of_property_count_u32_elems(np, "st,fiq-device");
+	if (channels != ST_A9_IRQ_MAX_CHANS) {
+		dev_err(&pdev->dev, "st,enable-fiq-device must have 2 elems\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ST_A9_IRQ_MAX_CHANS; i++) {
+		of_property_read_u32_index(np, "st,irq-device", i, &device);
+
+		ret = st_irq_xlate(pdev, device, i, true);
+		if (ret)
+			return ret;
+
+		of_property_read_u32_index(np, "st,fiq-device", i, &device);
+
+		ret = st_irq_xlate(pdev, device, i, false);
+		if (ret)
+			return ret;
+	}
+
+	/* External IRQs may be inverted. */
+	of_property_read_u32(np, "st,invert-ext", &invert);
+	ddata->config |= ST_A9_EXTIRQ_INV_SEL(invert);
+
+	return regmap_update_bits(ddata->regmap, ddata->syscfg,
+				  ST_A9_IRQ_MASK, ddata->config);
+}
+
+static int st_irq_syscfg_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct st_irq_syscfg *ddata;
+
+	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
+	if (!ddata)
+		return -ENOMEM;
+
+	match = of_match_device(st_irq_syscfg_match, &pdev->dev);
+	if (!match)
+		return -ENODEV;
+
+	ddata->syscfg = (unsigned int)match->data;
+
+	ddata->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscfg");
+	if (IS_ERR(ddata->regmap)) {
+		dev_err(&pdev->dev, "syscfg phandle missing\n");
+		return PTR_ERR(ddata->regmap);
+	}
+
+	dev_set_drvdata(&pdev->dev, ddata);
+
+	return st_irq_syscfg_enable(pdev);
+}
+
+static int st_irq_syscfg_resume(struct device *dev)
+{
+	struct st_irq_syscfg *ddata = dev_get_drvdata(dev);
+
+	return regmap_update_bits(ddata->regmap, ddata->syscfg,
+				  ST_A9_IRQ_MASK, ddata->config);
+}
+
+static SIMPLE_DEV_PM_OPS(st_irq_syscfg_pm_ops, NULL, st_irq_syscfg_resume);
+
+static struct platform_driver st_irq_syscfg_driver = {
+	.driver = {
+		.name = "st_irq_syscfg",
+		.pm = &st_irq_syscfg_pm_ops,
+		.of_match_table = st_irq_syscfg_match,
+	},
+	.probe = st_irq_syscfg_probe,
+};
+
+static int __init st_irq_syscfg_init(void)
+{
+	return platform_driver_register(&st_irq_syscfg_driver);
+}
+core_initcall(st_irq_syscfg_init);
diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c
new file mode 100644
index 0000000..51c485d
--- /dev/null
+++ b/drivers/irqchip/irq-tegra.c
@@ -0,0 +1,377 @@
+/*
+ * Driver code for Tegra's Legacy Interrupt Controller
+ *
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Heavily based on the original arch/arm/mach-tegra/irq.c code:
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * Author:
+ *	Colin Cross <ccross@android.com>
+ *
+ * Copyright (C) 2010,2013, NVIDIA Corporation
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#include "irqchip.h"
+
+#define ICTLR_CPU_IEP_VFIQ	0x08
+#define ICTLR_CPU_IEP_FIR	0x14
+#define ICTLR_CPU_IEP_FIR_SET	0x18
+#define ICTLR_CPU_IEP_FIR_CLR	0x1c
+
+#define ICTLR_CPU_IER		0x20
+#define ICTLR_CPU_IER_SET	0x24
+#define ICTLR_CPU_IER_CLR	0x28
+#define ICTLR_CPU_IEP_CLASS	0x2C
+
+#define ICTLR_COP_IER		0x30
+#define ICTLR_COP_IER_SET	0x34
+#define ICTLR_COP_IER_CLR	0x38
+#define ICTLR_COP_IEP_CLASS	0x3c
+
+#define TEGRA_MAX_NUM_ICTLRS	6
+
+static unsigned int num_ictlrs;
+
+struct tegra_ictlr_soc {
+	unsigned int num_ictlrs;
+};
+
+static const struct tegra_ictlr_soc tegra20_ictlr_soc = {
+	.num_ictlrs = 4,
+};
+
+static const struct tegra_ictlr_soc tegra30_ictlr_soc = {
+	.num_ictlrs = 5,
+};
+
+static const struct tegra_ictlr_soc tegra210_ictlr_soc = {
+	.num_ictlrs = 6,
+};
+
+static const struct of_device_id ictlr_matches[] = {
+	{ .compatible = "nvidia,tegra210-ictlr", .data = &tegra210_ictlr_soc },
+	{ .compatible = "nvidia,tegra30-ictlr", .data = &tegra30_ictlr_soc },
+	{ .compatible = "nvidia,tegra20-ictlr", .data = &tegra20_ictlr_soc },
+	{ }
+};
+
+struct tegra_ictlr_info {
+	void __iomem *base[TEGRA_MAX_NUM_ICTLRS];
+#ifdef CONFIG_PM_SLEEP
+	u32 cop_ier[TEGRA_MAX_NUM_ICTLRS];
+	u32 cop_iep[TEGRA_MAX_NUM_ICTLRS];
+	u32 cpu_ier[TEGRA_MAX_NUM_ICTLRS];
+	u32 cpu_iep[TEGRA_MAX_NUM_ICTLRS];
+
+	u32 ictlr_wake_mask[TEGRA_MAX_NUM_ICTLRS];
+#endif
+};
+
+static struct tegra_ictlr_info *lic;
+
+static inline void tegra_ictlr_write_mask(struct irq_data *d, unsigned long reg)
+{
+	void __iomem *base = d->chip_data;
+	u32 mask;
+
+	mask = BIT(d->hwirq % 32);
+	writel_relaxed(mask, base + reg);
+}
+
+static void tegra_mask(struct irq_data *d)
+{
+	tegra_ictlr_write_mask(d, ICTLR_CPU_IER_CLR);
+	irq_chip_mask_parent(d);
+}
+
+static void tegra_unmask(struct irq_data *d)
+{
+	tegra_ictlr_write_mask(d, ICTLR_CPU_IER_SET);
+	irq_chip_unmask_parent(d);
+}
+
+static void tegra_eoi(struct irq_data *d)
+{
+	tegra_ictlr_write_mask(d, ICTLR_CPU_IEP_FIR_CLR);
+	irq_chip_eoi_parent(d);
+}
+
+static int tegra_retrigger(struct irq_data *d)
+{
+	tegra_ictlr_write_mask(d, ICTLR_CPU_IEP_FIR_SET);
+	return irq_chip_retrigger_hierarchy(d);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_set_wake(struct irq_data *d, unsigned int enable)
+{
+	u32 irq = d->hwirq;
+	u32 index, mask;
+
+	index = (irq / 32);
+	mask = BIT(irq % 32);
+	if (enable)
+		lic->ictlr_wake_mask[index] |= mask;
+	else
+		lic->ictlr_wake_mask[index] &= ~mask;
+
+	/*
+	 * Do *not* call into the parent, as the GIC doesn't have any
+	 * wake-up facility...
+	 */
+	return 0;
+}
+
+static int tegra_ictlr_suspend(void)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	local_irq_save(flags);
+	for (i = 0; i < num_ictlrs; i++) {
+		void __iomem *ictlr = lic->base[i];
+
+		/* Save interrupt state */
+		lic->cpu_ier[i] = readl_relaxed(ictlr + ICTLR_CPU_IER);
+		lic->cpu_iep[i] = readl_relaxed(ictlr + ICTLR_CPU_IEP_CLASS);
+		lic->cop_ier[i] = readl_relaxed(ictlr + ICTLR_COP_IER);
+		lic->cop_iep[i] = readl_relaxed(ictlr + ICTLR_COP_IEP_CLASS);
+
+		/* Disable COP interrupts */
+		writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
+
+		/* Disable CPU interrupts */
+		writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
+
+		/* Enable the wakeup sources of ictlr */
+		writel_relaxed(lic->ictlr_wake_mask[i], ictlr + ICTLR_CPU_IER_SET);
+	}
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static void tegra_ictlr_resume(void)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	local_irq_save(flags);
+	for (i = 0; i < num_ictlrs; i++) {
+		void __iomem *ictlr = lic->base[i];
+
+		writel_relaxed(lic->cpu_iep[i],
+			       ictlr + ICTLR_CPU_IEP_CLASS);
+		writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
+		writel_relaxed(lic->cpu_ier[i],
+			       ictlr + ICTLR_CPU_IER_SET);
+		writel_relaxed(lic->cop_iep[i],
+			       ictlr + ICTLR_COP_IEP_CLASS);
+		writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
+		writel_relaxed(lic->cop_ier[i],
+			       ictlr + ICTLR_COP_IER_SET);
+	}
+	local_irq_restore(flags);
+}
+
+static struct syscore_ops tegra_ictlr_syscore_ops = {
+	.suspend	= tegra_ictlr_suspend,
+	.resume		= tegra_ictlr_resume,
+};
+
+static void tegra_ictlr_syscore_init(void)
+{
+	register_syscore_ops(&tegra_ictlr_syscore_ops);
+}
+#else
+#define tegra_set_wake	NULL
+static inline void tegra_ictlr_syscore_init(void) {}
+#endif
+
+static struct irq_chip tegra_ictlr_chip = {
+	.name			= "LIC",
+	.irq_eoi		= tegra_eoi,
+	.irq_mask		= tegra_mask,
+	.irq_unmask		= tegra_unmask,
+	.irq_retrigger		= tegra_retrigger,
+	.irq_set_wake		= tegra_set_wake,
+	.flags			= IRQCHIP_MASK_ON_SUSPEND,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+#endif
+};
+
+static int tegra_ictlr_domain_xlate(struct irq_domain *domain,
+				    struct device_node *controller,
+				    const u32 *intspec,
+				    unsigned int intsize,
+				    unsigned long *out_hwirq,
+				    unsigned int *out_type)
+{
+	if (domain->of_node != controller)
+		return -EINVAL;	/* Shouldn't happen, really... */
+	if (intsize != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (intspec[0] != GIC_SPI)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2];
+	return 0;
+}
+
+static int tegra_ictlr_domain_alloc(struct irq_domain *domain,
+				    unsigned int virq,
+				    unsigned int nr_irqs, void *data)
+{
+	struct of_phandle_args *args = data;
+	struct of_phandle_args parent_args;
+	struct tegra_ictlr_info *info = domain->host_data;
+	irq_hw_number_t hwirq;
+	unsigned int i;
+
+	if (args->args_count != 3)
+		return -EINVAL;	/* Not GIC compliant */
+	if (args->args[0] != GIC_SPI)
+		return -EINVAL;	/* No PPI should point to this domain */
+
+	hwirq = args->args[1];
+	if (hwirq >= (num_ictlrs * 32))
+		return -EINVAL;
+
+	for (i = 0; i < nr_irqs; i++) {
+		int ictlr = (hwirq + i) / 32;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &tegra_ictlr_chip,
+					      &info->base[ictlr]);
+	}
+
+	parent_args = *args;
+	parent_args.np = domain->parent->of_node;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+}
+
+static void tegra_ictlr_domain_free(struct irq_domain *domain,
+				    unsigned int virq,
+				    unsigned int nr_irqs)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
+static const struct irq_domain_ops tegra_ictlr_domain_ops = {
+	.xlate	= tegra_ictlr_domain_xlate,
+	.alloc	= tegra_ictlr_domain_alloc,
+	.free	= tegra_ictlr_domain_free,
+};
+
+static int __init tegra_ictlr_init(struct device_node *node,
+				   struct device_node *parent)
+{
+	struct irq_domain *parent_domain, *domain;
+	const struct of_device_id *match;
+	const struct tegra_ictlr_soc *soc;
+	unsigned int i;
+	int err;
+
+	if (!parent) {
+		pr_err("%s: no parent, giving up\n", node->full_name);
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%s: unable to obtain parent domain\n", node->full_name);
+		return -ENXIO;
+	}
+
+	match = of_match_node(ictlr_matches, node);
+	if (!match)		/* Should never happen... */
+		return -ENODEV;
+
+	soc = match->data;
+
+	lic = kzalloc(sizeof(*lic), GFP_KERNEL);
+	if (!lic)
+		return -ENOMEM;
+
+	for (i = 0; i < TEGRA_MAX_NUM_ICTLRS; i++) {
+		void __iomem *base;
+
+		base = of_iomap(node, i);
+		if (!base)
+			break;
+
+		lic->base[i] = base;
+
+		/* Disable all interrupts */
+		writel_relaxed(~0UL, base + ICTLR_CPU_IER_CLR);
+		/* All interrupts target IRQ */
+		writel_relaxed(0, base + ICTLR_CPU_IEP_CLASS);
+
+		num_ictlrs++;
+	}
+
+	if (!num_ictlrs) {
+		pr_err("%s: no valid regions, giving up\n", node->full_name);
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	WARN(num_ictlrs != soc->num_ictlrs,
+	     "%s: Found %u interrupt controllers in DT; expected %u.\n",
+	     node->full_name, num_ictlrs, soc->num_ictlrs);
+
+
+	domain = irq_domain_add_hierarchy(parent_domain, 0, num_ictlrs * 32,
+					  node, &tegra_ictlr_domain_ops,
+					  lic);
+	if (!domain) {
+		pr_err("%s: failed to allocated domain\n", node->full_name);
+		err = -ENOMEM;
+		goto out_unmap;
+	}
+
+	tegra_ictlr_syscore_init();
+
+	pr_info("%s: %d interrupts forwarded to %s\n",
+		node->full_name, num_ictlrs * 32, parent->full_name);
+
+	return 0;
+
+out_unmap:
+	for (i = 0; i < num_ictlrs; i++)
+		iounmap(lic->base[i]);
+out_free:
+	kfree(lic);
+	return err;
+}
+
+IRQCHIP_DECLARE(tegra20_ictlr, "nvidia,tegra20-ictlr", tegra_ictlr_init);
+IRQCHIP_DECLARE(tegra30_ictlr, "nvidia,tegra30-ictlr", tegra_ictlr_init);
+IRQCHIP_DECLARE(tegra210_ictlr, "nvidia,tegra210-ictlr", tegra_ictlr_init);
diff --git a/drivers/irqchip/irq-vf610-mscm-ir.c b/drivers/irqchip/irq-vf610-mscm-ir.c
new file mode 100644
index 0000000..9521057
--- /dev/null
+++ b/drivers/irqchip/irq-vf610-mscm-ir.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2014-2015 Toradex AG
+ * Author: Stefan Agner <stefan@agner.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * IRQ chip driver for MSCM interrupt router available on Vybrid SoC's.
+ * The interrupt router is between the CPU's interrupt controller and the
+ * peripheral. The router allows to route the peripheral interrupts to
+ * one of the two available CPU's on Vybrid VF6xx SoC's (Cortex-A5 or
+ * Cortex-M4). The router will be configured transparently on a IRQ
+ * request.
+ *
+ * o All peripheral interrupts of the Vybrid SoC can be routed to
+ *   CPU 0, CPU 1 or both. The routing is useful for dual-core
+ *   variants of Vybrid SoC such as VF6xx. This driver routes the
+ *   requested interrupt to the CPU currently running on.
+ *
+ * o It is required to setup the interrupt router even on single-core
+ *   variants of Vybrid.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/syscon.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/regmap.h>
+
+#include "irqchip.h"
+
+#define MSCM_CPxNUM		0x4
+
+#define MSCM_IRSPRC(n)		(0x80 + 2 * (n))
+#define MSCM_IRSPRC_CPEN_MASK	0x3
+
+#define MSCM_IRSPRC_NUM		112
+
+struct vf610_mscm_ir_chip_data {
+	void __iomem *mscm_ir_base;
+	u16 cpu_mask;
+	u16 saved_irsprc[MSCM_IRSPRC_NUM];
+};
+
+static struct vf610_mscm_ir_chip_data *mscm_ir_data;
+
+static inline void vf610_mscm_ir_save(struct vf610_mscm_ir_chip_data *data)
+{
+	int i;
+
+	for (i = 0; i < MSCM_IRSPRC_NUM; i++)
+		data->saved_irsprc[i] = readw_relaxed(data->mscm_ir_base + MSCM_IRSPRC(i));
+}
+
+static inline void vf610_mscm_ir_restore(struct vf610_mscm_ir_chip_data *data)
+{
+	int i;
+
+	for (i = 0; i < MSCM_IRSPRC_NUM; i++)
+		writew_relaxed(data->saved_irsprc[i], data->mscm_ir_base + MSCM_IRSPRC(i));
+}
+
+static int vf610_mscm_ir_notifier(struct notifier_block *self,
+				  unsigned long cmd, void *v)
+{
+	switch (cmd) {
+	case CPU_CLUSTER_PM_ENTER:
+		vf610_mscm_ir_save(mscm_ir_data);
+		break;
+	case CPU_CLUSTER_PM_ENTER_FAILED:
+	case CPU_CLUSTER_PM_EXIT:
+		vf610_mscm_ir_restore(mscm_ir_data);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mscm_ir_notifier_block = {
+	.notifier_call = vf610_mscm_ir_notifier,
+};
+
+static void vf610_mscm_ir_enable(struct irq_data *data)
+{
+	irq_hw_number_t hwirq = data->hwirq;
+	struct vf610_mscm_ir_chip_data *chip_data = data->chip_data;
+	u16 irsprc;
+
+	irsprc = readw_relaxed(chip_data->mscm_ir_base + MSCM_IRSPRC(hwirq));
+	irsprc &= MSCM_IRSPRC_CPEN_MASK;
+
+	WARN_ON(irsprc & ~chip_data->cpu_mask);
+
+	writew_relaxed(chip_data->cpu_mask,
+		       chip_data->mscm_ir_base + MSCM_IRSPRC(hwirq));
+
+	irq_chip_unmask_parent(data);
+}
+
+static void vf610_mscm_ir_disable(struct irq_data *data)
+{
+	irq_hw_number_t hwirq = data->hwirq;
+	struct vf610_mscm_ir_chip_data *chip_data = data->chip_data;
+
+	writew_relaxed(0x0, chip_data->mscm_ir_base + MSCM_IRSPRC(hwirq));
+
+	irq_chip_mask_parent(data);
+}
+
+static struct irq_chip vf610_mscm_ir_irq_chip = {
+	.name			= "mscm-ir",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_enable		= vf610_mscm_ir_enable,
+	.irq_disable		= vf610_mscm_ir_disable,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static int vf610_mscm_ir_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				      unsigned int nr_irqs, void *arg)
+{
+	int i;
+	irq_hw_number_t hwirq;
+	struct of_phandle_args *irq_data = arg;
+	struct of_phandle_args gic_data;
+
+	if (irq_data->args_count != 2)
+		return -EINVAL;
+
+	hwirq = irq_data->args[0];
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &vf610_mscm_ir_irq_chip,
+					      domain->host_data);
+
+	gic_data.np = domain->parent->of_node;
+	gic_data.args_count = 3;
+	gic_data.args[0] = GIC_SPI;
+	gic_data.args[1] = irq_data->args[0];
+	gic_data.args[2] = irq_data->args[1];
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &gic_data);
+}
+
+static const struct irq_domain_ops mscm_irq_domain_ops = {
+	.xlate = irq_domain_xlate_twocell,
+	.alloc = vf610_mscm_ir_domain_alloc,
+	.free = irq_domain_free_irqs_common,
+};
+
+static int __init vf610_mscm_ir_of_init(struct device_node *node,
+			       struct device_node *parent)
+{
+	struct irq_domain *domain, *domain_parent;
+	struct regmap *mscm_cp_regmap;
+	int ret, cpuid;
+
+	domain_parent = irq_find_host(parent);
+	if (!domain_parent) {
+		pr_err("vf610_mscm_ir: interrupt-parent not found\n");
+		return -EINVAL;
+	}
+
+	mscm_ir_data = kzalloc(sizeof(*mscm_ir_data), GFP_KERNEL);
+	if (!mscm_ir_data)
+		return -ENOMEM;
+
+	mscm_ir_data->mscm_ir_base = of_io_request_and_map(node, 0, "mscm-ir");
+
+	if (!mscm_ir_data->mscm_ir_base) {
+		pr_err("vf610_mscm_ir: unable to map mscm register\n");
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	mscm_cp_regmap = syscon_regmap_lookup_by_phandle(node, "fsl,cpucfg");
+	if (IS_ERR(mscm_cp_regmap)) {
+		ret = PTR_ERR(mscm_cp_regmap);
+		pr_err("vf610_mscm_ir: regmap lookup for cpucfg failed\n");
+		goto out_unmap;
+	}
+
+	regmap_read(mscm_cp_regmap, MSCM_CPxNUM, &cpuid);
+	mscm_ir_data->cpu_mask = 0x1 << cpuid;
+
+	domain = irq_domain_add_hierarchy(domain_parent, 0,
+					  MSCM_IRSPRC_NUM, node,
+					  &mscm_irq_domain_ops, mscm_ir_data);
+	if (!domain) {
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	cpu_pm_register_notifier(&mscm_ir_notifier_block);
+
+	return 0;
+
+out_unmap:
+	iounmap(mscm_ir_data->mscm_ir_base);
+out_free:
+	kfree(mscm_ir_data);
+	return ret;
+}
+IRQCHIP_DECLARE(vf610_mscm_ir, "fsl,vf610-mscm-ir", vf610_mscm_ir_of_init);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 717daad..e617878 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -249,6 +249,7 @@
 	const int rw = bio_data_dir(bio);
 	struct mddev *mddev = q->queuedata;
 	unsigned int sectors;
+	int cpu;
 
 	if (mddev == NULL || mddev->pers == NULL
 	    || !mddev->ready) {
@@ -284,7 +285,10 @@
 	sectors = bio_sectors(bio);
 	mddev->pers->make_request(mddev, bio);
 
-	generic_start_io_acct(rw, sectors, &mddev->gendisk->part0);
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+	part_stat_unlock();
 
 	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
 		wake_up(&mddev->sb_wait);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 3ed9f42..3b5d7f7 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -313,7 +313,7 @@
 
 /*
  * remaps the bio to the target device. we separate two flows.
- * power 2 flow and a general flow for the sake of perfromance
+ * power 2 flow and a general flow for the sake of performance
 */
 static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
 				sector_t sector, sector_t *sector_offset)
@@ -524,6 +524,7 @@
 			split = bio;
 		}
 
+		sector = bio->bi_iter.bi_sector;
 		zone = find_zone(mddev->private, &sector);
 		tmp_dev = map_sector(mddev, zone, sector, &sector);
 		split->bi_bdev = tmp_dev->bdev;
diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c
index 5d2d8f4..67faa8d 100644
--- a/drivers/media/dvb-frontends/rtl2832.c
+++ b/drivers/media/dvb-frontends/rtl2832.c
@@ -1240,7 +1240,7 @@
 	dev->regmap_config.max_register = 5 * 0x100,
 	dev->regmap_config.ranges = regmap_range_cfg,
 	dev->regmap_config.num_ranges = ARRAY_SIZE(regmap_range_cfg),
-	dev->regmap_config.cache_type = REGCACHE_RBTREE,
+	dev->regmap_config.cache_type = REGCACHE_NONE,
 	dev->regmap = regmap_init(&client->dev, &regmap_bus, client,
 				  &dev->regmap_config);
 	if (IS_ERR(dev->regmap)) {
diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c
index e4901a5..63c0ee5 100644
--- a/drivers/media/pci/cx23885/cx23885-417.c
+++ b/drivers/media/pci/cx23885/cx23885-417.c
@@ -1339,14 +1339,13 @@
 	strlcpy(cap->driver, dev->name, sizeof(cap->driver));
 	strlcpy(cap->card, cx23885_boards[tsport->dev->board].name,
 		sizeof(cap->card));
-	sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci));
-	cap->capabilities =
-		V4L2_CAP_VIDEO_CAPTURE |
-		V4L2_CAP_READWRITE     |
-		V4L2_CAP_STREAMING     |
-		0;
+	sprintf(cap->bus_info, "PCIe:%s", pci_name(dev->pci));
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE |
+			   V4L2_CAP_STREAMING;
 	if (dev->tuner_type != TUNER_ABSENT)
-		cap->capabilities |= V4L2_CAP_TUNER;
+		cap->device_caps |= V4L2_CAP_TUNER;
+	cap->capabilities = cap->device_caps | V4L2_CAP_VBI_CAPTURE |
+		V4L2_CAP_AUDIO | V4L2_CAP_DEVICE_CAPS;
 
 	return 0;
 }
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index 12f7452..a92ff42 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -1845,6 +1845,9 @@
 	struct s5p_jpeg_addr jpeg_addr;
 	u32 pix_size, padding_bytes = 0;
 
+	jpeg_addr.cb = 0;
+	jpeg_addr.cr = 0;
+
 	pix_size = ctx->cap_q.w * ctx->cap_q.h;
 
 	if (ctx->mode == S5P_JPEG_ENCODE) {
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c b/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c
index e8c2cad..0974b9a 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c
@@ -20,7 +20,7 @@
 
 void exynos3250_jpeg_reset(void __iomem *regs)
 {
-	u32 reg = 0;
+	u32 reg = 1;
 	int count = 1000;
 
 	writel(1, regs + EXYNOS3250_SW_RESET);
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c
index 8e44a59..98374e8 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c
@@ -833,6 +833,7 @@
 	q->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
 	q->io_modes = VB2_MMAP;
 	q->drv_priv = &ctx->fh;
+	q->lock = &dev->mfc_mutex;
 	if (vdev == dev->vfd_dec) {
 		q->io_modes = VB2_MMAP;
 		q->ops = get_dec_queue_ops();
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
index 15f7663..24262bb 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
@@ -29,7 +29,7 @@
 
 /* Offset base used to differentiate between CAPTURE and OUTPUT
 *  while mmaping */
-#define DST_QUEUE_OFF_BASE      (TASK_SIZE / 2)
+#define DST_QUEUE_OFF_BASE	(1 << 30)
 
 #define MFC_BANK1_ALLOC_CTX	0
 #define MFC_BANK2_ALLOC_CTX	1
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h b/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h
index de2b8c6..22dfb3e 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h
@@ -302,7 +302,7 @@
 	void (*write_info)(struct s5p_mfc_ctx *ctx, unsigned int data,
 			unsigned int ofs);
 	unsigned int (*read_info)(struct s5p_mfc_ctx *ctx,
-			unsigned int ofs);
+			unsigned long ofs);
 	int (*get_dspl_y_adr)(struct s5p_mfc_dev *dev);
 	int (*get_dec_y_adr)(struct s5p_mfc_dev *dev);
 	int (*get_dspl_status)(struct s5p_mfc_dev *dev);
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c
index 0c4fcf2..b09bcd1 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c
@@ -263,15 +263,15 @@
 static void s5p_mfc_write_info_v5(struct s5p_mfc_ctx *ctx, unsigned int data,
 			unsigned int ofs)
 {
-	writel(data, (volatile void __iomem *)(ctx->shm.virt + ofs));
+	writel(data, (void *)(ctx->shm.virt + ofs));
 	wmb();
 }
 
 static unsigned int s5p_mfc_read_info_v5(struct s5p_mfc_ctx *ctx,
-				unsigned int ofs)
+				unsigned long ofs)
 {
 	rmb();
-	return readl((volatile void __iomem *)(ctx->shm.virt + ofs));
+	return readl((void *)(ctx->shm.virt + ofs));
 }
 
 static void s5p_mfc_dec_calc_dpb_size_v5(struct s5p_mfc_ctx *ctx)
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
index d826c58..cefad18 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
@@ -1852,17 +1852,17 @@
 		unsigned int ofs)
 {
 	s5p_mfc_clock_on();
-	writel(data, (volatile void __iomem *)((unsigned long)ofs));
+	writel(data, (void *)((unsigned long)ofs));
 	s5p_mfc_clock_off();
 }
 
 static unsigned int
-s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned int ofs)
+s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned long ofs)
 {
 	int ret;
 
 	s5p_mfc_clock_on();
-	ret = readl((volatile void __iomem *)((unsigned long)ofs));
+	ret = readl((void *)ofs);
 	s5p_mfc_clock_off();
 
 	return ret;
diff --git a/drivers/media/platform/s5p-tv/Kconfig b/drivers/media/platform/s5p-tv/Kconfig
index 5a1835d..697aaed 100644
--- a/drivers/media/platform/s5p-tv/Kconfig
+++ b/drivers/media/platform/s5p-tv/Kconfig
@@ -20,6 +20,7 @@
 config VIDEO_SAMSUNG_S5P_HDMI
 	tristate "Samsung HDMI Driver"
 	depends on VIDEO_V4L2
+	depends on I2C
 	depends on VIDEO_SAMSUNG_S5P_TV
 	select VIDEO_SAMSUNG_S5P_HDMIPHY
 	help
diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c
index a901b62..2554f37 100644
--- a/drivers/media/platform/sh_veu.c
+++ b/drivers/media/platform/sh_veu.c
@@ -1158,6 +1158,7 @@
 	}
 
 	*vdev = sh_veu_videodev;
+	vdev->v4l2_dev = &veu->v4l2_dev;
 	spin_lock_init(&veu->lock);
 	mutex_init(&veu->fop_lock);
 	vdev->lock = &veu->fop_lock;
diff --git a/drivers/media/platform/soc_camera/atmel-isi.c b/drivers/media/platform/soc_camera/atmel-isi.c
index 8526bf5..c835beb 100644
--- a/drivers/media/platform/soc_camera/atmel-isi.c
+++ b/drivers/media/platform/soc_camera/atmel-isi.c
@@ -843,6 +843,8 @@
 	if (isi->pdata.full_mode)
 		cfg1 |= ISI_CFG1_FULL_MODE;
 
+	cfg1 |= ISI_CFG1_THMASK_BEATS_16;
+
 	isi_writel(isi, ISI_CTRL, ISI_CTRL_DIS);
 	isi_writel(isi, ISI_CFG1, cfg1);
 
diff --git a/drivers/media/platform/soc_camera/soc_camera.c b/drivers/media/platform/soc_camera/soc_camera.c
index cee7b56..66634b4 100644
--- a/drivers/media/platform/soc_camera/soc_camera.c
+++ b/drivers/media/platform/soc_camera/soc_camera.c
@@ -1665,7 +1665,7 @@
 eaddpdev:
 	platform_device_put(sasc->pdev);
 eallocpdev:
-	devm_kfree(ici->v4l2_dev.dev, sasc);
+	devm_kfree(ici->v4l2_dev.dev, info);
 	dev_err(ici->v4l2_dev.dev, "group probe failed: %d\n", ret);
 
 	return ret;
diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index 77dcfdf..87fc0fe 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -780,8 +780,6 @@
 		case TUNER_RTL2832_TUA9001:
 			return rtl2832u_tua9001_tuner_callback(d, cmd, arg);
 		}
-	default:
-		return -EINVAL;
 	}
 
 	return 0;
diff --git a/drivers/media/usb/gspca/Kconfig b/drivers/media/usb/gspca/Kconfig
index 60af3b1..3fd94fe 100644
--- a/drivers/media/usb/gspca/Kconfig
+++ b/drivers/media/usb/gspca/Kconfig
@@ -1,6 +1,7 @@
 menuconfig USB_GSPCA
 	tristate "GSPCA based webcams"
 	depends on VIDEO_V4L2
+	depends on INPUT || INPUT=n
 	default m
 	---help---
 	  Say Y here if you want to enable selecting webcams based
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index bc08a82..cc16e76 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -3230,18 +3230,13 @@
 
 	if (threadio == NULL)
 		return 0;
-	call_void_qop(q, wait_finish, q);
 	threadio->stop = true;
-	vb2_internal_streamoff(q, q->type);
-	call_void_qop(q, wait_prepare, q);
+	/* Wake up all pending sleeps in the thread */
+	vb2_queue_error(q);
 	err = kthread_stop(threadio->thread);
-	q->fileio = NULL;
-	fileio->req.count = 0;
-	vb2_reqbufs(q, &fileio->req);
-	kfree(fileio);
+	__vb2_cleanup_fileio(q);
 	threadio->thread = NULL;
 	kfree(threadio);
-	q->fileio = NULL;
 	q->threadio = NULL;
 	return err;
 }
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index b481d20..69e0483 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -632,8 +632,7 @@
 	}
 
 	/* extract page list from userspace mapping */
-	ret = vb2_dc_get_user_pages(start, pages, n_pages, vma,
-				    dma_dir == DMA_FROM_DEVICE);
+	ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, dma_dir);
 	if (ret) {
 		unsigned long pfn;
 		if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) {
diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index cfff0b6..0d18256 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -49,7 +49,9 @@
 		if (!psy)
 			continue;
 
-		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
+		ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_ONLINE,
+				&val);
+		power_supply_put(psy);
 
 		if (!ret && val.intval) {
 			charger_present = true;
@@ -63,8 +65,8 @@
 	/* Check if battery is known */
 	psy = power_supply_get_by_name("ab8500_btemp");
 	if (psy) {
-		ret = psy->get_property(psy, POWER_SUPPLY_PROP_TECHNOLOGY,
-					&val);
+		ret = power_supply_get_property(psy,
+				POWER_SUPPLY_PROP_TECHNOLOGY, &val);
 		if (!ret && val.intval != POWER_SUPPLY_TECHNOLOGY_UNKNOWN) {
 			printk(KERN_INFO
 			       "Charger \"%s\" is connected with known battery."
@@ -72,6 +74,7 @@
 			       pss[i]);
 			machine_restart("charging");
 		}
+		power_supply_put(psy);
 	}
 
 shutdown:
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index b1b580a..0acbe52 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -87,7 +87,7 @@
 	},
 };
 
-static struct resource axp288_battery_resources[] = {
+static struct resource axp288_fuel_gauge_resources[] = {
 	{
 		.start = AXP288_IRQ_QWBTU,
 		.end   = AXP288_IRQ_QWBTU,
@@ -350,9 +350,9 @@
 		.resources = axp288_charger_resources,
 	},
 	{
-		.name = "axp288_battery",
-		.num_resources = ARRAY_SIZE(axp288_battery_resources),
-		.resources = axp288_battery_resources,
+		.name = "axp288_fuel_gauge",
+		.num_resources = ARRAY_SIZE(axp288_fuel_gauge_resources),
+		.resources = axp288_fuel_gauge_resources,
 	},
 	{
 		.name = "axp288_pmic_acpi",
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 38552a3..65fed71 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -202,16 +202,17 @@
 {
 	char name[ENCLOSURE_NAME_SIZE];
 
+	enclosure_link_name(cdev, name);
+
 	/*
 	 * In odd circumstances, like multipath devices, something else may
 	 * already have removed the links, so check for this condition first.
 	 */
-	if (!cdev->dev->kobj.sd)
-		return;
+	if (cdev->dev->kobj.sd)
+		sysfs_remove_link(&cdev->dev->kobj, name);
 
-	enclosure_link_name(cdev, name);
-	sysfs_remove_link(&cdev->dev->kobj, name);
-	sysfs_remove_link(&cdev->cdev.kobj, "device");
+	if (cdev->cdev.kobj.sd)
+		sysfs_remove_link(&cdev->cdev.kobj, "device");
 }
 
 static int enclosure_add_links(struct enclosure_component *cdev)
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 82dc5748..7f32712 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -1210,7 +1210,7 @@
 
 		if (((die_args->trapnr == X86_TRAP_MF) ||
 		     (die_args->trapnr == X86_TRAP_XF)) &&
-		    !user_mode_vm(die_args->regs))
+		    !user_mode(die_args->regs))
 			xpc_die_deactivate();
 
 		break;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 23f10f7..c296bc0 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -897,6 +897,7 @@
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long flags;
 	int stop;
+	bool pm = false;
 
 	might_sleep();
 
@@ -916,15 +917,18 @@
 		host->claimed = 1;
 		host->claimer = current;
 		host->claim_cnt += 1;
+		if (host->claim_cnt == 1)
+			pm = true;
 	} else
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
-	if (host->ops->enable && !stop && host->claim_cnt == 1)
-		host->ops->enable(host);
+
+	if (pm)
+		pm_runtime_get_sync(mmc_dev(host));
+
 	return stop;
 }
-
 EXPORT_SYMBOL(__mmc_claim_host);
 
 /**
@@ -940,9 +944,6 @@
 
 	WARN_ON(!host->claimed);
 
-	if (host->ops->disable && host->claim_cnt == 1)
-		host->ops->disable(host);
-
 	spin_lock_irqsave(&host->lock, flags);
 	if (--host->claim_cnt) {
 		/* Release for nested claim */
@@ -952,6 +953,8 @@
 		host->claimer = NULL;
 		spin_unlock_irqrestore(&host->lock, flags);
 		wake_up(&host->wq);
+		pm_runtime_mark_last_busy(mmc_dev(host));
+		pm_runtime_put_autosuspend(mmc_dev(host));
 	}
 }
 EXPORT_SYMBOL(mmc_release_host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1d41e85..c84131e 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_runtime.h>
@@ -336,6 +337,8 @@
 {
 	int err = 0, idx;
 	unsigned int part_size;
+	struct device_node *np;
+	bool broken_hpi = false;
 
 	/* Version is coded in the CSD_STRUCTURE byte in the EXT_CSD register */
 	card->ext_csd.raw_ext_csd_structure = ext_csd[EXT_CSD_STRUCTURE];
@@ -349,6 +352,11 @@
 		}
 	}
 
+	np = mmc_of_find_child_device(card->host, 0);
+	if (np && of_device_is_compatible(np, "mmc-card"))
+		broken_hpi = of_property_read_bool(np, "broken-hpi");
+	of_node_put(np);
+
 	/*
 	 * The EXT_CSD format is meant to be forward compatible. As long
 	 * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV
@@ -494,7 +502,7 @@
 		}
 
 		/* check whether the eMMC card supports HPI */
-		if (ext_csd[EXT_CSD_HPI_FEATURES] & 0x1) {
+		if (!broken_hpi && (ext_csd[EXT_CSD_HPI_FEATURES] & 0x1)) {
 			card->ext_csd.hpi = 1;
 			if (ext_csd[EXT_CSD_HPI_FEATURES] & 0x2)
 				card->ext_csd.hpi_cmd =	MMC_STOP_TRANSMISSION;
diff --git a/drivers/mmc/core/pwrseq.c b/drivers/mmc/core/pwrseq.c
index 8623561..ab21297 100644
--- a/drivers/mmc/core/pwrseq.c
+++ b/drivers/mmc/core/pwrseq.c
@@ -19,7 +19,7 @@
 
 struct mmc_pwrseq_match {
 	const char *compatible;
-	int (*alloc)(struct mmc_host *host, struct device *dev);
+	struct mmc_pwrseq *(*alloc)(struct mmc_host *host, struct device *dev);
 };
 
 static struct mmc_pwrseq_match pwrseq_match[] = {
@@ -52,6 +52,7 @@
 	struct platform_device *pdev;
 	struct device_node *np;
 	struct mmc_pwrseq_match *match;
+	struct mmc_pwrseq *pwrseq;
 	int ret = 0;
 
 	np = of_parse_phandle(host->parent->of_node, "mmc-pwrseq", 0);
@@ -70,9 +71,14 @@
 		goto err;
 	}
 
-	ret = match->alloc(host, &pdev->dev);
-	if (!ret)
-		dev_info(host->parent, "allocated mmc-pwrseq\n");
+	pwrseq = match->alloc(host, &pdev->dev);
+	if (IS_ERR(pwrseq)) {
+		ret = PTR_ERR(host->pwrseq);
+		goto err;
+	}
+
+	host->pwrseq = pwrseq;
+	dev_info(host->parent, "allocated mmc-pwrseq\n");
 
 err:
 	of_node_put(np);
@@ -109,4 +115,6 @@
 
 	if (pwrseq && pwrseq->ops && pwrseq->ops->free)
 		pwrseq->ops->free(host);
+
+	host->pwrseq = NULL;
 }
diff --git a/drivers/mmc/core/pwrseq.h b/drivers/mmc/core/pwrseq.h
index aba3409..096da48 100644
--- a/drivers/mmc/core/pwrseq.h
+++ b/drivers/mmc/core/pwrseq.h
@@ -27,8 +27,10 @@
 void mmc_pwrseq_power_off(struct mmc_host *host);
 void mmc_pwrseq_free(struct mmc_host *host);
 
-int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev);
-int mmc_pwrseq_emmc_alloc(struct mmc_host *host, struct device *dev);
+struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
+					   struct device *dev);
+struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
+					 struct device *dev);
 
 #else
 
diff --git a/drivers/mmc/core/pwrseq_emmc.c b/drivers/mmc/core/pwrseq_emmc.c
index a2d5459..9d6d2fb 100644
--- a/drivers/mmc/core/pwrseq_emmc.c
+++ b/drivers/mmc/core/pwrseq_emmc.c
@@ -49,7 +49,6 @@
 	unregister_restart_handler(&pwrseq->reset_nb);
 	gpiod_put(pwrseq->reset_gpio);
 	kfree(pwrseq);
-	host->pwrseq = NULL;
 }
 
 static struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
@@ -67,14 +66,15 @@
 	return NOTIFY_DONE;
 }
 
-int mmc_pwrseq_emmc_alloc(struct mmc_host *host, struct device *dev)
+struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
+					 struct device *dev)
 {
 	struct mmc_pwrseq_emmc *pwrseq;
 	int ret = 0;
 
 	pwrseq = kzalloc(sizeof(struct mmc_pwrseq_emmc), GFP_KERNEL);
 	if (!pwrseq)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	pwrseq->reset_gpio = gpiod_get_index(dev, "reset", 0, GPIOD_OUT_LOW);
 	if (IS_ERR(pwrseq->reset_gpio)) {
@@ -92,10 +92,9 @@
 	register_restart_handler(&pwrseq->reset_nb);
 
 	pwrseq->pwrseq.ops = &mmc_pwrseq_emmc_ops;
-	host->pwrseq = &pwrseq->pwrseq;
 
-	return 0;
+	return &pwrseq->pwrseq;
 free:
 	kfree(pwrseq);
-	return ret;
+	return ERR_PTR(ret);
 }
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index c53f14a..0b14b83 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -85,7 +85,6 @@
 		clk_put(pwrseq->ext_clk);
 
 	kfree(pwrseq);
-	host->pwrseq = NULL;
 }
 
 static struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
@@ -95,7 +94,8 @@
 	.free = mmc_pwrseq_simple_free,
 };
 
-int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev)
+struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
+					   struct device *dev)
 {
 	struct mmc_pwrseq_simple *pwrseq;
 	int i, nr_gpios, ret = 0;
@@ -107,7 +107,7 @@
 	pwrseq = kzalloc(sizeof(struct mmc_pwrseq_simple) + nr_gpios *
 			 sizeof(struct gpio_desc *), GFP_KERNEL);
 	if (!pwrseq)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	pwrseq->ext_clk = clk_get(dev, "ext_clock");
 	if (IS_ERR(pwrseq->ext_clk) &&
@@ -133,13 +133,12 @@
 
 	pwrseq->nr_gpios = nr_gpios;
 	pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
-	host->pwrseq = &pwrseq->pwrseq;
 
-	return 0;
+	return &pwrseq->pwrseq;
 clk_put:
 	if (!IS_ERR(pwrseq->ext_clk))
 		clk_put(pwrseq->ext_clk);
 free:
 	kfree(pwrseq);
-	return ret;
+	return ERR_PTR(ret);
 }
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index ce6cc47..5bc6c7d 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -293,19 +293,22 @@
 	int err;
 
 	if (card->type == MMC_TYPE_SDIO)
-		return sdio_enable_wide(card);
-
-	if ((card->host->caps & MMC_CAP_4_BIT_DATA) &&
-		(card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
+		err = sdio_enable_wide(card);
+	else if ((card->host->caps & MMC_CAP_4_BIT_DATA) &&
+		 (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
 		err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
 		if (err)
 			return err;
+		err = sdio_enable_wide(card);
+		if (err <= 0)
+			mmc_app_set_bus_width(card, MMC_BUS_WIDTH_1);
 	} else
 		return 0;
 
-	err = sdio_enable_wide(card);
-	if (err <= 0)
-		mmc_app_set_bus_width(card, MMC_BUS_WIDTH_1);
+	if (err > 0) {
+		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		err = 0;
+	}
 
 	return err;
 }
@@ -547,13 +550,8 @@
 	/*
 	 * Switch to wider bus (if supported).
 	 */
-	if (card->host->caps & MMC_CAP_4_BIT_DATA) {
+	if (card->host->caps & MMC_CAP_4_BIT_DATA)
 		err = sdio_enable_4bit_bus(card);
-		if (err > 0) {
-			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-			err = 0;
-		}
-	}
 
 	/* Set the driver strength for the card */
 	sdio_select_driver_type(card);
@@ -803,9 +801,7 @@
 		 * Switch to wider bus (if supported).
 		 */
 		err = sdio_enable_4bit_bus(card);
-		if (err > 0)
-			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-		else if (err)
+		if (err)
 			goto remove;
 	}
 finish:
@@ -983,10 +979,6 @@
 	} else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
-		if (err > 0) {
-			mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
-			err = 0;
-		}
 	}
 
 	if (!err && host->sdio_irqs) {
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 61ac63a..7f4db90 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -132,7 +132,7 @@
 config MMC_SDHCI_OF_ESDHC
 	tristate "SDHCI OF support for the Freescale eSDHC controller"
 	depends on MMC_SDHCI_PLTFM
-	depends on PPC_OF
+	depends on PPC
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
 	  This selects the Freescale eSDHC controller support.
@@ -144,7 +144,7 @@
 config MMC_SDHCI_OF_HLWD
 	tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers"
 	depends on MMC_SDHCI_PLTFM
-	depends on PPC_OF
+	depends on PPC
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
 	  This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -230,7 +230,7 @@
 	tristate "Marvell MMP2 SD Host Controller support (PXAV3)"
 	depends on CLKDEV_LOOKUP
 	depends on MMC_SDHCI_PLTFM
-	depends on ARCH_MMP || COMPILE_TEST
+	depends on ARCH_BERLIN || ARCH_MMP || ARCH_MVEBU || COMPILE_TEST
 	default CPU_MMP2
 	help
 	  This selects the Marvell(R) PXAV3 SD Host Controller.
@@ -255,6 +255,7 @@
 config MMC_SDHCI_SPEAR
 	tristate "SDHCI support on ST SPEAr platform"
 	depends on MMC_SDHCI && PLAT_SPEAR
+	depends on OF
 	help
 	  This selects the Secure Digital Host Controller Interface (SDHCI)
 	  often referrered to as the HSMMC block in some of the ST SPEAR range
@@ -307,6 +308,20 @@
 
 	  If unsure, say N.
 
+config MMC_SDHCI_IPROC
+	tristate "SDHCI platform support for the iProc SD/MMC Controller"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on MMC_SDHCI_PLTFM
+	default ARCH_BCM_IPROC
+	select MMC_SDHCI_IO_ACCESSORS
+	help
+	  This selects the iProc SD/MMC controller.
+
+	  If you have an IPROC platform with SD or MMC devices,
+	  say Y or M here.
+
+	  If unsure, say N.
+
 config MMC_MOXART
 	tristate "MOXART SD/MMC Host Controller support"
 	depends on ARCH_MOXART && MMC
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 6a7cfe0..711e913 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -71,6 +71,7 @@
 obj-$(CONFIG_MMC_SDHCI_OF_HLWD)		+= sdhci-of-hlwd.o
 obj-$(CONFIG_MMC_SDHCI_BCM_KONA)	+= sdhci-bcm-kona.o
 obj-$(CONFIG_MMC_SDHCI_BCM2835)		+= sdhci-bcm2835.o
+obj-$(CONFIG_MMC_SDHCI_IPROC)		+= sdhci-iproc.o
 obj-$(CONFIG_MMC_SDHCI_MSM)		+= sdhci-msm.o
 obj-$(CONFIG_MMC_SDHCI_ST)		+= sdhci-st.o
 
diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h
index c97001e..0aa44e6 100644
--- a/drivers/mmc/host/atmel-mci-regs.h
+++ b/drivers/mmc/host/atmel-mci-regs.h
@@ -135,10 +135,17 @@
 #define ATMCI_REGS_SIZE		0x100
 
 /* Register access macros */
-#define atmci_readl(port,reg)				\
+#ifdef CONFIG_AVR32
+#define atmci_readl(port, reg)			\
 	__raw_readl((port)->regs + reg)
-#define atmci_writel(port,reg,value)			\
+#define atmci_writel(port, reg, value)			\
 	__raw_writel((value), (port)->regs + reg)
+#else
+#define atmci_readl(port, reg)			\
+	readl_relaxed((port)->regs + reg)
+#define atmci_writel(port, reg, value)			\
+	writel_relaxed((value), (port)->regs + reg)
+#endif
 
 /* On AVR chips the Peripheral DMA Controller is not connected to MCI. */
 #ifdef CONFIG_AVR32
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index fe32948..e761eb1 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -40,7 +40,12 @@
 	u8				ciu_div;
 	u32				sdr_timing;
 	u32				ddr_timing;
+	u32				hs400_timing;
+	u32				tuned_sample;
 	u32				cur_speed;
+	u32				dqs_delay;
+	u32				saved_dqs_en;
+	u32				saved_strobe_ctrl;
 };
 
 static struct dw_mci_exynos_compatible {
@@ -71,6 +76,21 @@
 	},
 };
 
+static inline u8 dw_mci_exynos_get_ciu_div(struct dw_mci *host)
+{
+	struct dw_mci_exynos_priv_data *priv = host->priv;
+
+	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS4412)
+		return EXYNOS4412_FIXED_CIU_CLK_DIV;
+	else if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS4210)
+		return EXYNOS4210_FIXED_CIU_CLK_DIV;
+	else if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
+			priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
+		return SDMMC_CLKSEL_GET_DIV(mci_readl(host, CLKSEL64)) + 1;
+	else
+		return SDMMC_CLKSEL_GET_DIV(mci_readl(host, CLKSEL)) + 1;
+}
+
 static int dw_mci_exynos_priv_init(struct dw_mci *host)
 {
 	struct dw_mci_exynos_priv_data *priv = host->priv;
@@ -85,6 +105,16 @@
 			   SDMMC_MPSCTRL_NON_SECURE_WRITE_BIT);
 	}
 
+	if (priv->ctrl_type >= DW_MCI_TYPE_EXYNOS5420) {
+		priv->saved_strobe_ctrl = mci_readl(host, HS400_DLINE_CTRL);
+		priv->saved_dqs_en = mci_readl(host, HS400_DQS_EN);
+		priv->saved_dqs_en |= AXI_NON_BLOCKING_WR;
+		mci_writel(host, HS400_DQS_EN, priv->saved_dqs_en);
+		if (!priv->dqs_delay)
+			priv->dqs_delay =
+				DQS_CTRL_GET_RD_DELAY(priv->saved_strobe_ctrl);
+	}
+
 	return 0;
 }
 
@@ -97,6 +127,26 @@
 	return 0;
 }
 
+static void dw_mci_exynos_set_clksel_timing(struct dw_mci *host, u32 timing)
+{
+	struct dw_mci_exynos_priv_data *priv = host->priv;
+	u32 clksel;
+
+	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
+		priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
+		clksel = mci_readl(host, CLKSEL64);
+	else
+		clksel = mci_readl(host, CLKSEL);
+
+	clksel = (clksel & ~SDMMC_CLKSEL_TIMING_MASK) | timing;
+
+	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
+		priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
+		mci_writel(host, CLKSEL64, clksel);
+	else
+		mci_writel(host, CLKSEL, clksel);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int dw_mci_exynos_suspend(struct device *dev)
 {
@@ -172,30 +222,38 @@
 	}
 }
 
-static void dw_mci_exynos_set_ios(struct dw_mci *host, struct mmc_ios *ios)
+static void dw_mci_exynos_config_hs400(struct dw_mci *host, u32 timing)
 {
 	struct dw_mci_exynos_priv_data *priv = host->priv;
-	unsigned int wanted = ios->clock;
-	unsigned long actual;
-	u8 div = priv->ciu_div + 1;
+	u32 dqs, strobe;
 
-	if (ios->timing == MMC_TIMING_MMC_DDR52) {
-		if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
-			priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
-			mci_writel(host, CLKSEL64, priv->ddr_timing);
-		else
-			mci_writel(host, CLKSEL, priv->ddr_timing);
-		/* Should be double rate for DDR mode */
-		if (ios->bus_width == MMC_BUS_WIDTH_8)
-			wanted <<= 1;
+	/*
+	 * Not supported to configure register
+	 * related to HS400
+	 */
+	if (priv->ctrl_type < DW_MCI_TYPE_EXYNOS5420)
+		return;
+
+	dqs = priv->saved_dqs_en;
+	strobe = priv->saved_strobe_ctrl;
+
+	if (timing == MMC_TIMING_MMC_HS400) {
+		dqs |= DATA_STROBE_EN;
+		strobe = DQS_CTRL_RD_DELAY(strobe, priv->dqs_delay);
 	} else {
-		if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
-			priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
-			mci_writel(host, CLKSEL64, priv->sdr_timing);
-		else
-			mci_writel(host, CLKSEL, priv->sdr_timing);
+		dqs &= ~DATA_STROBE_EN;
 	}
 
+	mci_writel(host, HS400_DQS_EN, dqs);
+	mci_writel(host, HS400_DLINE_CTRL, strobe);
+}
+
+static void dw_mci_exynos_adjust_clock(struct dw_mci *host, unsigned int wanted)
+{
+	struct dw_mci_exynos_priv_data *priv = host->priv;
+	unsigned long actual;
+	u8 div;
+	int ret;
 	/*
 	 * Don't care if wanted clock is zero or
 	 * ciu clock is unavailable
@@ -207,17 +265,52 @@
 	if (wanted < EXYNOS_CCLKIN_MIN)
 		wanted = EXYNOS_CCLKIN_MIN;
 
-	if (wanted != priv->cur_speed) {
-		int ret = clk_set_rate(host->ciu_clk, wanted * div);
-		if (ret)
-			dev_warn(host->dev,
-				"failed to set clk-rate %u error: %d\n",
-				 wanted * div, ret);
-		actual = clk_get_rate(host->ciu_clk);
-		host->bus_hz = actual / div;
-		priv->cur_speed = wanted;
-		host->current_speed = 0;
+	if (wanted == priv->cur_speed)
+		return;
+
+	div = dw_mci_exynos_get_ciu_div(host);
+	ret = clk_set_rate(host->ciu_clk, wanted * div);
+	if (ret)
+		dev_warn(host->dev,
+			"failed to set clk-rate %u error: %d\n",
+			wanted * div, ret);
+	actual = clk_get_rate(host->ciu_clk);
+	host->bus_hz = actual / div;
+	priv->cur_speed = wanted;
+	host->current_speed = 0;
+}
+
+static void dw_mci_exynos_set_ios(struct dw_mci *host, struct mmc_ios *ios)
+{
+	struct dw_mci_exynos_priv_data *priv = host->priv;
+	unsigned int wanted = ios->clock;
+	u32 timing = ios->timing, clksel;
+
+	switch (timing) {
+	case MMC_TIMING_MMC_HS400:
+		/* Update tuned sample timing */
+		clksel = SDMMC_CLKSEL_UP_SAMPLE(
+				priv->hs400_timing, priv->tuned_sample);
+		wanted <<= 1;
+		break;
+	case MMC_TIMING_MMC_DDR52:
+		clksel = priv->ddr_timing;
+		/* Should be double rate for DDR mode */
+		if (ios->bus_width == MMC_BUS_WIDTH_8)
+			wanted <<= 1;
+		break;
+	default:
+		clksel = priv->sdr_timing;
 	}
+
+	/* Set clock timing for the requested speed mode*/
+	dw_mci_exynos_set_clksel_timing(host, clksel);
+
+	/* Configure setting for HS400 */
+	dw_mci_exynos_config_hs400(host, timing);
+
+	/* Configure clock rate */
+	dw_mci_exynos_adjust_clock(host, wanted);
 }
 
 static int dw_mci_exynos_parse_dt(struct dw_mci *host)
@@ -260,6 +353,16 @@
 		return ret;
 
 	priv->ddr_timing = SDMMC_CLKSEL_TIMING(timing[0], timing[1], div);
+
+	ret = of_property_read_u32_array(np,
+			"samsung,dw-mshc-hs400-timing", timing, 2);
+	if (!ret && of_property_read_u32(np,
+				"samsung,read-strobe-delay", &priv->dqs_delay))
+		dev_dbg(host->dev,
+			"read-strobe-delay is not found, assuming usage of default value\n");
+
+	priv->hs400_timing = SDMMC_CLKSEL_TIMING(timing[0], timing[1],
+						HS400_FIXED_CIU_CLK_DIV);
 	host->priv = priv;
 	return 0;
 }
@@ -285,7 +388,7 @@
 		clksel = mci_readl(host, CLKSEL64);
 	else
 		clksel = mci_readl(host, CLKSEL);
-	clksel = (clksel & ~0x7) | SDMMC_CLKSEL_CCLK_SAMPLE(sample);
+	clksel = SDMMC_CLKSEL_UP_SAMPLE(clksel, sample);
 	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
 		priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
 		mci_writel(host, CLKSEL64, clksel);
@@ -304,13 +407,16 @@
 		clksel = mci_readl(host, CLKSEL64);
 	else
 		clksel = mci_readl(host, CLKSEL);
+
 	sample = (clksel + 1) & 0x7;
-	clksel = (clksel & ~0x7) | sample;
+	clksel = SDMMC_CLKSEL_UP_SAMPLE(clksel, sample);
+
 	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
 		priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
 		mci_writel(host, CLKSEL64, clksel);
 	else
 		mci_writel(host, CLKSEL, clksel);
+
 	return sample;
 }
 
@@ -343,6 +449,7 @@
 static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot)
 {
 	struct dw_mci *host = slot->host;
+	struct dw_mci_exynos_priv_data *priv = host->priv;
 	struct mmc_host *mmc = slot->mmc;
 	u8 start_smpl, smpl, candiates = 0;
 	s8 found = -1;
@@ -360,14 +467,27 @@
 	} while (start_smpl != smpl);
 
 	found = dw_mci_exynos_get_best_clksmpl(candiates);
-	if (found >= 0)
+	if (found >= 0) {
 		dw_mci_exynos_set_clksmpl(host, found);
-	else
+		priv->tuned_sample = found;
+	} else {
 		ret = -EIO;
+	}
 
 	return ret;
 }
 
+static int dw_mci_exynos_prepare_hs400_tuning(struct dw_mci *host,
+					struct mmc_ios *ios)
+{
+	struct dw_mci_exynos_priv_data *priv = host->priv;
+
+	dw_mci_exynos_set_clksel_timing(host, priv->hs400_timing);
+	dw_mci_exynos_adjust_clock(host, (ios->clock) << 1);
+
+	return 0;
+}
+
 /* Common capabilities of Exynos4/Exynos5 SoC */
 static unsigned long exynos_dwmmc_caps[4] = {
 	MMC_CAP_1_8V_DDR | MMC_CAP_8_BIT_DATA | MMC_CAP_CMD23,
@@ -384,6 +504,7 @@
 	.set_ios		= dw_mci_exynos_set_ios,
 	.parse_dt		= dw_mci_exynos_parse_dt,
 	.execute_tuning		= dw_mci_exynos_execute_tuning,
+	.prepare_hs400_tuning	= dw_mci_exynos_prepare_hs400_tuning,
 };
 
 static const struct of_device_id dw_mci_exynos_match[] = {
diff --git a/drivers/mmc/host/dw_mmc-exynos.h b/drivers/mmc/host/dw_mmc-exynos.h
index 7872ce5..595c934 100644
--- a/drivers/mmc/host/dw_mmc-exynos.h
+++ b/drivers/mmc/host/dw_mmc-exynos.h
@@ -12,20 +12,36 @@
 #ifndef _DW_MMC_EXYNOS_H_
 #define _DW_MMC_EXYNOS_H_
 
-/* Extended Register's Offset */
 #define SDMMC_CLKSEL			0x09C
 #define SDMMC_CLKSEL64			0x0A8
 
+/* Extended Register's Offset */
+#define SDMMC_HS400_DQS_EN		0x180
+#define SDMMC_HS400_ASYNC_FIFO_CTRL	0x184
+#define SDMMC_HS400_DLINE_CTRL		0x188
+
 /* CLKSEL register defines */
 #define SDMMC_CLKSEL_CCLK_SAMPLE(x)	(((x) & 7) << 0)
 #define SDMMC_CLKSEL_CCLK_DRIVE(x)	(((x) & 7) << 16)
 #define SDMMC_CLKSEL_CCLK_DIVIDER(x)	(((x) & 7) << 24)
 #define SDMMC_CLKSEL_GET_DRV_WD3(x)	(((x) >> 16) & 0x7)
+#define SDMMC_CLKSEL_GET_DIV(x)		(((x) >> 24) & 0x7)
+#define SDMMC_CLKSEL_UP_SAMPLE(x, y)	(((x) & ~SDMMC_CLKSEL_CCLK_SAMPLE(7)) |\
+					 SDMMC_CLKSEL_CCLK_SAMPLE(y))
 #define SDMMC_CLKSEL_TIMING(x, y, z)	(SDMMC_CLKSEL_CCLK_SAMPLE(x) |	\
 					 SDMMC_CLKSEL_CCLK_DRIVE(y) |	\
 					 SDMMC_CLKSEL_CCLK_DIVIDER(z))
+#define SDMMC_CLKSEL_TIMING_MASK	SDMMC_CLKSEL_TIMING(0x7, 0x7, 0x7)
 #define SDMMC_CLKSEL_WAKEUP_INT		BIT(11)
 
+/* RCLK_EN register defines */
+#define DATA_STROBE_EN			BIT(0)
+#define AXI_NON_BLOCKING_WR	BIT(7)
+
+/* DLINE_CTRL register defines */
+#define DQS_CTRL_RD_DELAY(x, y)		(((x) & ~0x3FF) | ((y) & 0x3FF))
+#define DQS_CTRL_GET_RD_DELAY(x)	((x) & 0x3FF)
+
 /* Protector Register */
 #define SDMMC_EMMCP_BASE	0x1000
 #define SDMMC_MPSECURITY	(SDMMC_EMMCP_BASE + 0x0010)
@@ -49,6 +65,7 @@
 /* Fixed clock divider */
 #define EXYNOS4210_FIXED_CIU_CLK_DIV	2
 #define EXYNOS4412_FIXED_CIU_CLK_DIV	4
+#define HS400_FIXED_CIU_CLK_DIV		1
 
 /* Minimal required clock frequency for cclkin, unit: HZ */
 #define EXYNOS_CCLKIN_MIN	50000000
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index e2a726a..dbf166f 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -76,12 +76,20 @@
 	return 0;
 }
 
+/* Common capabilities of RK3288 SoC */
+static unsigned long dw_mci_rk3288_dwmmc_caps[4] = {
+	MMC_CAP_RUNTIME_RESUME, /* emmc */
+	MMC_CAP_RUNTIME_RESUME, /* sdmmc */
+	MMC_CAP_RUNTIME_RESUME, /* sdio0 */
+	MMC_CAP_RUNTIME_RESUME, /* sdio1 */
+};
 static const struct dw_mci_drv_data rk2928_drv_data = {
 	.prepare_command        = dw_mci_rockchip_prepare_command,
 	.init			= dw_mci_rockchip_init,
 };
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
+	.caps			= dw_mci_rk3288_dwmmc_caps,
 	.prepare_command        = dw_mci_rockchip_prepare_command,
 	.set_ios		= dw_mci_rk3288_set_ios,
 	.setup_clock    = dw_mci_rk3288_setup_clock,
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 4d2e3c2..38b2926 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -69,7 +69,8 @@
 
 	u32		des2;	/*Buffer sizes */
 #define IDMAC_64ADDR_SET_BUFFER1_SIZE(d, s) \
-	((d)->des2 = ((d)->des2 & 0x03ffe000) | ((s) & 0x1fff))
+	((d)->des2 = ((d)->des2 & cpu_to_le32(0x03ffe000)) | \
+	 ((cpu_to_le32(s)) & cpu_to_le32(0x1fff)))
 
 	u32		des3;	/* Reserved */
 
@@ -81,7 +82,7 @@
 };
 
 struct idmac_desc {
-	u32		des0;	/* Control Descriptor */
+	__le32		des0;	/* Control Descriptor */
 #define IDMAC_DES0_DIC	BIT(1)
 #define IDMAC_DES0_LD	BIT(2)
 #define IDMAC_DES0_FD	BIT(3)
@@ -90,18 +91,19 @@
 #define IDMAC_DES0_CES	BIT(30)
 #define IDMAC_DES0_OWN	BIT(31)
 
-	u32		des1;	/* Buffer sizes */
+	__le32		des1;	/* Buffer sizes */
 #define IDMAC_SET_BUFFER1_SIZE(d, s) \
 	((d)->des1 = ((d)->des1 & 0x03ffe000) | ((s) & 0x1fff))
 
-	u32		des2;	/* buffer 1 physical address */
+	__le32		des2;	/* buffer 1 physical address */
 
-	u32		des3;	/* buffer 2 physical address */
+	__le32		des3;	/* buffer 2 physical address */
 };
 #endif /* CONFIG_MMC_DW_IDMAC */
 
 static bool dw_mci_reset(struct dw_mci *host);
 static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset);
+static int dw_mci_card_busy(struct mmc_host *mmc);
 
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
@@ -335,6 +337,31 @@
 	return cmdr;
 }
 
+static void dw_mci_wait_while_busy(struct dw_mci *host, u32 cmd_flags)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(500);
+
+	/*
+	 * Databook says that before issuing a new data transfer command
+	 * we need to check to see if the card is busy.  Data transfer commands
+	 * all have SDMMC_CMD_PRV_DAT_WAIT set, so we'll key off that.
+	 *
+	 * ...also allow sending for SDMMC_CMD_VOLT_SWITCH where busy is
+	 * expected.
+	 */
+	if ((cmd_flags & SDMMC_CMD_PRV_DAT_WAIT) &&
+	    !(cmd_flags & SDMMC_CMD_VOLT_SWITCH)) {
+		while (mci_readl(host, STATUS) & SDMMC_STATUS_BUSY) {
+			if (time_after(jiffies, timeout)) {
+				/* Command will fail; we'll pass error then */
+				dev_err(host->dev, "Busy; trying anyway\n");
+				break;
+			}
+			udelay(10);
+		}
+	}
+}
+
 static void dw_mci_start_command(struct dw_mci *host,
 				 struct mmc_command *cmd, u32 cmd_flags)
 {
@@ -345,6 +372,7 @@
 
 	mci_writel(host, CMDARG, cmd->arg);
 	wmb();
+	dw_mci_wait_while_busy(host, cmd_flags);
 
 	mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
 }
@@ -477,23 +505,23 @@
 			 * Set the OWN bit and disable interrupts for this
 			 * descriptor
 			 */
-			desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC |
-						IDMAC_DES0_CH;
+			desc->des0 = cpu_to_le32(IDMAC_DES0_OWN |
+					IDMAC_DES0_DIC | IDMAC_DES0_CH);
 			/* Buffer length */
 			IDMAC_SET_BUFFER1_SIZE(desc, length);
 
 			/* Physical address to DMA to/from */
-			desc->des2 = mem_addr;
+			desc->des2 = cpu_to_le32(mem_addr);
 		}
 
 		/* Set first descriptor */
 		desc = host->sg_cpu;
-		desc->des0 |= IDMAC_DES0_FD;
+		desc->des0 |= cpu_to_le32(IDMAC_DES0_FD);
 
 		/* Set last descriptor */
 		desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
-		desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
-		desc->des0 |= IDMAC_DES0_LD;
+		desc->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | IDMAC_DES0_DIC));
+		desc->des0 |= cpu_to_le32(IDMAC_DES0_LD);
 	}
 
 	wmb();
@@ -562,12 +590,12 @@
 
 		/* Forward link the descriptor list */
 		for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
-			p->des3 = host->sg_dma + (sizeof(struct idmac_desc) *
-								(i + 1));
+			p->des3 = cpu_to_le32(host->sg_dma +
+					(sizeof(struct idmac_desc) * (i + 1)));
 
 		/* Set the last descriptor as the end-of-ring descriptor */
-		p->des3 = host->sg_dma;
-		p->des0 = IDMAC_DES0_ER;
+		p->des3 = cpu_to_le32(host->sg_dma);
+		p->des0 = cpu_to_le32(IDMAC_DES0_ER);
 	}
 
 	dw_mci_idmac_reset(host);
@@ -737,6 +765,7 @@
 		return;
 
 	if (host->timing != MMC_TIMING_MMC_HS200 &&
+	    host->timing != MMC_TIMING_MMC_HS400 &&
 	    host->timing != MMC_TIMING_UHS_SDR104)
 		goto disable;
 
@@ -876,6 +905,7 @@
 
 	mci_writel(host, CMDARG, arg);
 	wmb();
+	dw_mci_wait_while_busy(host, cmd);
 	mci_writel(host, CMD, SDMMC_CMD_START | cmd);
 
 	while (time_before(jiffies, timeout)) {
@@ -992,6 +1022,26 @@
 
 	dw_mci_start_command(host, cmd, cmdflags);
 
+	if (cmd->opcode == SD_SWITCH_VOLTAGE) {
+		unsigned long irqflags;
+
+		/*
+		 * Databook says to fail after 2ms w/ no response, but evidence
+		 * shows that sometimes the cmd11 interrupt takes over 130ms.
+		 * We'll set to 500ms, plus an extra jiffy just in case jiffies
+		 * is just about to roll over.
+		 *
+		 * We do this whole thing under spinlock and only if the
+		 * command hasn't already completed (indicating the the irq
+		 * already ran so we don't want the timeout).
+		 */
+		spin_lock_irqsave(&host->irq_lock, irqflags);
+		if (!test_bit(EVENT_CMD_COMPLETE, &host->pending_events))
+			mod_timer(&host->cmd11_timer,
+				jiffies + msecs_to_jiffies(500) + 1);
+		spin_unlock_irqrestore(&host->irq_lock, irqflags);
+	}
+
 	if (mrq->stop)
 		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
 	else
@@ -1084,7 +1134,8 @@
 	regs = mci_readl(slot->host, UHS_REG);
 
 	/* DDR mode set */
-	if (ios->timing == MMC_TIMING_MMC_DDR52)
+	if (ios->timing == MMC_TIMING_MMC_DDR52 ||
+	    ios->timing == MMC_TIMING_MMC_HS400)
 		regs |= ((0x1 << slot->id) << 16);
 	else
 		regs &= ~((0x1 << slot->id) << 16);
@@ -1101,12 +1152,6 @@
 	if (drv_data && drv_data->set_ios)
 		drv_data->set_ios(slot->host, ios);
 
-	/* Slot specific timing and width adjustment */
-	dw_mci_setup_bus(slot, false);
-
-	if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0)
-		slot->host->state = STATE_IDLE;
-
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		if (!IS_ERR(mmc->supply.vmmc)) {
@@ -1125,23 +1170,39 @@
 		mci_writel(slot->host, PWREN, regs);
 		break;
 	case MMC_POWER_ON:
-		if (!IS_ERR(mmc->supply.vqmmc) && !slot->host->vqmmc_enabled) {
-			ret = regulator_enable(mmc->supply.vqmmc);
-			if (ret < 0)
-				dev_err(slot->host->dev,
-					"failed to enable vqmmc regulator\n");
-			else
+		if (!slot->host->vqmmc_enabled) {
+			if (!IS_ERR(mmc->supply.vqmmc)) {
+				ret = regulator_enable(mmc->supply.vqmmc);
+				if (ret < 0)
+					dev_err(slot->host->dev,
+						"failed to enable vqmmc\n");
+				else
+					slot->host->vqmmc_enabled = true;
+
+			} else {
+				/* Keep track so we don't reset again */
 				slot->host->vqmmc_enabled = true;
+			}
+
+			/* Reset our state machine after powering on */
+			dw_mci_ctrl_reset(slot->host,
+					  SDMMC_CTRL_ALL_RESET_FLAGS);
 		}
+
+		/* Adjust clock / bus width after power is up */
+		dw_mci_setup_bus(slot, false);
+
 		break;
 	case MMC_POWER_OFF:
+		/* Turn clock off before power goes down */
+		dw_mci_setup_bus(slot, false);
+
 		if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 
-		if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled) {
+		if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled)
 			regulator_disable(mmc->supply.vqmmc);
-			slot->host->vqmmc_enabled = false;
-		}
+		slot->host->vqmmc_enabled = false;
 
 		regs = mci_readl(slot->host, PWREN);
 		regs &= ~(1 << slot->id);
@@ -1150,6 +1211,9 @@
 	default:
 		break;
 	}
+
+	if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0)
+		slot->host->state = STATE_IDLE;
 }
 
 static int dw_mci_card_busy(struct mmc_host *mmc)
@@ -1323,6 +1387,18 @@
 	return err;
 }
 
+static int dw_mci_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+	const struct dw_mci_drv_data *drv_data = host->drv_data;
+
+	if (drv_data && drv_data->prepare_hs400_tuning)
+		return drv_data->prepare_hs400_tuning(host, ios);
+
+	return 0;
+}
+
 static const struct mmc_host_ops dw_mci_ops = {
 	.request		= dw_mci_request,
 	.pre_req		= dw_mci_pre_req,
@@ -1335,6 +1411,7 @@
 	.card_busy		= dw_mci_card_busy,
 	.start_signal_voltage_switch = dw_mci_switch_voltage,
 	.init_card		= dw_mci_init_card,
+	.prepare_hs400_tuning	= dw_mci_prepare_hs400_tuning,
 };
 
 static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
@@ -1520,7 +1597,10 @@
 			if (test_and_clear_bit(EVENT_DATA_ERROR,
 					       &host->pending_events)) {
 				dw_mci_stop_dma(host);
-				send_stop_abort(host, data);
+				if (data->stop ||
+				    !(host->data_status & (SDMMC_INT_DRTO |
+							   SDMMC_INT_EBE)))
+					send_stop_abort(host, data);
 				state = STATE_DATA_ERROR;
 				break;
 			}
@@ -1547,7 +1627,10 @@
 			if (test_and_clear_bit(EVENT_DATA_ERROR,
 					       &host->pending_events)) {
 				dw_mci_stop_dma(host);
-				send_stop_abort(host, data);
+				if (data->stop ||
+				    !(host->data_status & (SDMMC_INT_DRTO |
+							   SDMMC_INT_EBE)))
+					send_stop_abort(host, data);
 				state = STATE_DATA_ERROR;
 				break;
 			}
@@ -1685,8 +1768,7 @@
 		buf += len;
 		cnt -= len;
 		if (host->part_buf_count == 2) {
-			mci_writew(host, DATA(host->data_offset),
-					host->part_buf16);
+			mci_fifo_writew(host->fifo_reg, host->part_buf16);
 			host->part_buf_count = 0;
 		}
 	}
@@ -1703,15 +1785,14 @@
 			cnt -= len;
 			/* push data from aligned buffer into fifo */
 			for (i = 0; i < items; ++i)
-				mci_writew(host, DATA(host->data_offset),
-						aligned_buf[i]);
+				mci_fifo_writew(host->fifo_reg, aligned_buf[i]);
 		}
 	} else
 #endif
 	{
 		u16 *pdata = buf;
 		for (; cnt >= 2; cnt -= 2)
-			mci_writew(host, DATA(host->data_offset), *pdata++);
+			mci_fifo_writew(host->fifo_reg, *pdata++);
 		buf = pdata;
 	}
 	/* put anything remaining in the part_buf */
@@ -1720,8 +1801,7 @@
 		 /* Push data if we have reached the expected data length */
 		if ((data->bytes_xfered + init_cnt) ==
 		    (data->blksz * data->blocks))
-			mci_writew(host, DATA(host->data_offset),
-				   host->part_buf16);
+			mci_fifo_writew(host->fifo_reg, host->part_buf16);
 	}
 }
 
@@ -1736,8 +1816,7 @@
 			int items = len >> 1;
 			int i;
 			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readw(host,
-						DATA(host->data_offset));
+				aligned_buf[i] = mci_fifo_readw(host->fifo_reg);
 			/* memcpy from aligned buffer into output buffer */
 			memcpy(buf, aligned_buf, len);
 			buf += len;
@@ -1748,11 +1827,11 @@
 	{
 		u16 *pdata = buf;
 		for (; cnt >= 2; cnt -= 2)
-			*pdata++ = mci_readw(host, DATA(host->data_offset));
+			*pdata++ = mci_fifo_readw(host->fifo_reg);
 		buf = pdata;
 	}
 	if (cnt) {
-		host->part_buf16 = mci_readw(host, DATA(host->data_offset));
+		host->part_buf16 = mci_fifo_readw(host->fifo_reg);
 		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
@@ -1768,8 +1847,7 @@
 		buf += len;
 		cnt -= len;
 		if (host->part_buf_count == 4) {
-			mci_writel(host, DATA(host->data_offset),
-					host->part_buf32);
+			mci_fifo_writel(host->fifo_reg,	host->part_buf32);
 			host->part_buf_count = 0;
 		}
 	}
@@ -1786,15 +1864,14 @@
 			cnt -= len;
 			/* push data from aligned buffer into fifo */
 			for (i = 0; i < items; ++i)
-				mci_writel(host, DATA(host->data_offset),
-						aligned_buf[i]);
+				mci_fifo_writel(host->fifo_reg,	aligned_buf[i]);
 		}
 	} else
 #endif
 	{
 		u32 *pdata = buf;
 		for (; cnt >= 4; cnt -= 4)
-			mci_writel(host, DATA(host->data_offset), *pdata++);
+			mci_fifo_writel(host->fifo_reg, *pdata++);
 		buf = pdata;
 	}
 	/* put anything remaining in the part_buf */
@@ -1803,8 +1880,7 @@
 		 /* Push data if we have reached the expected data length */
 		if ((data->bytes_xfered + init_cnt) ==
 		    (data->blksz * data->blocks))
-			mci_writel(host, DATA(host->data_offset),
-				   host->part_buf32);
+			mci_fifo_writel(host->fifo_reg, host->part_buf32);
 	}
 }
 
@@ -1819,8 +1895,7 @@
 			int items = len >> 2;
 			int i;
 			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readl(host,
-						DATA(host->data_offset));
+				aligned_buf[i] = mci_fifo_readl(host->fifo_reg);
 			/* memcpy from aligned buffer into output buffer */
 			memcpy(buf, aligned_buf, len);
 			buf += len;
@@ -1831,11 +1906,11 @@
 	{
 		u32 *pdata = buf;
 		for (; cnt >= 4; cnt -= 4)
-			*pdata++ = mci_readl(host, DATA(host->data_offset));
+			*pdata++ = mci_fifo_readl(host->fifo_reg);
 		buf = pdata;
 	}
 	if (cnt) {
-		host->part_buf32 = mci_readl(host, DATA(host->data_offset));
+		host->part_buf32 = mci_fifo_readl(host->fifo_reg);
 		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
@@ -1852,8 +1927,7 @@
 		cnt -= len;
 
 		if (host->part_buf_count == 8) {
-			mci_writeq(host, DATA(host->data_offset),
-					host->part_buf);
+			mci_fifo_writeq(host->fifo_reg,	host->part_buf);
 			host->part_buf_count = 0;
 		}
 	}
@@ -1870,15 +1944,14 @@
 			cnt -= len;
 			/* push data from aligned buffer into fifo */
 			for (i = 0; i < items; ++i)
-				mci_writeq(host, DATA(host->data_offset),
-						aligned_buf[i]);
+				mci_fifo_writeq(host->fifo_reg,	aligned_buf[i]);
 		}
 	} else
 #endif
 	{
 		u64 *pdata = buf;
 		for (; cnt >= 8; cnt -= 8)
-			mci_writeq(host, DATA(host->data_offset), *pdata++);
+			mci_fifo_writeq(host->fifo_reg, *pdata++);
 		buf = pdata;
 	}
 	/* put anything remaining in the part_buf */
@@ -1887,8 +1960,7 @@
 		/* Push data if we have reached the expected data length */
 		if ((data->bytes_xfered + init_cnt) ==
 		    (data->blksz * data->blocks))
-			mci_writeq(host, DATA(host->data_offset),
-				   host->part_buf);
+			mci_fifo_writeq(host->fifo_reg, host->part_buf);
 	}
 }
 
@@ -1903,8 +1975,8 @@
 			int items = len >> 3;
 			int i;
 			for (i = 0; i < items; ++i)
-				aligned_buf[i] = mci_readq(host,
-						DATA(host->data_offset));
+				aligned_buf[i] = mci_fifo_readq(host->fifo_reg);
+
 			/* memcpy from aligned buffer into output buffer */
 			memcpy(buf, aligned_buf, len);
 			buf += len;
@@ -1915,11 +1987,11 @@
 	{
 		u64 *pdata = buf;
 		for (; cnt >= 8; cnt -= 8)
-			*pdata++ = mci_readq(host, DATA(host->data_offset));
+			*pdata++ = mci_fifo_readq(host->fifo_reg);
 		buf = pdata;
 	}
 	if (cnt) {
-		host->part_buf = mci_readq(host, DATA(host->data_offset));
+		host->part_buf = mci_fifo_readq(host->fifo_reg);
 		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
@@ -2097,9 +2169,20 @@
 		/* Check volt switch first, since it can look like an error */
 		if ((host->state == STATE_SENDING_CMD11) &&
 		    (pending & SDMMC_INT_VOLT_SWITCH)) {
+			unsigned long irqflags;
+
 			mci_writel(host, RINTSTS, SDMMC_INT_VOLT_SWITCH);
 			pending &= ~SDMMC_INT_VOLT_SWITCH;
+
+			/*
+			 * Hold the lock; we know cmd11_timer can't be kicked
+			 * off after the lock is released, so safe to delete.
+			 */
+			spin_lock_irqsave(&host->irq_lock, irqflags);
 			dw_mci_cmd_interrupt(host, pending);
+			spin_unlock_irqrestore(&host->irq_lock, irqflags);
+
+			del_timer(&host->cmd11_timer);
 		}
 
 		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
@@ -2156,6 +2239,10 @@
 		/* Handle SDIO Interrupts */
 		for (i = 0; i < host->num_slots; i++) {
 			struct dw_mci_slot *slot = host->slot[i];
+
+			if (!slot)
+				continue;
+
 			if (pending & SDMMC_INT_SDIO(slot->sdio_id)) {
 				mci_writel(host, RINTSTS,
 					   SDMMC_INT_SDIO(slot->sdio_id));
@@ -2506,6 +2593,20 @@
 	return ret;
 }
 
+static void dw_mci_cmd11_timer(unsigned long arg)
+{
+	struct dw_mci *host = (struct dw_mci *)arg;
+
+	if (host->state != STATE_SENDING_CMD11) {
+		dev_warn(host->dev, "Unexpected CMD11 timeout\n");
+		return;
+	}
+
+	host->cmd_status = SDMMC_INT_RTO;
+	set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+	tasklet_schedule(&host->tasklet);
+}
+
 #ifdef CONFIG_OF
 static struct dw_mci_of_quirks {
 	char *quirk;
@@ -2574,6 +2675,34 @@
 }
 #endif /* CONFIG_OF */
 
+static void dw_mci_enable_cd(struct dw_mci *host)
+{
+	struct dw_mci_board *brd = host->pdata;
+	unsigned long irqflags;
+	u32 temp;
+	int i;
+
+	/* No need for CD if broken card detection */
+	if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
+		return;
+
+	/* No need for CD if all slots have a non-error GPIO */
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+
+		if (IS_ERR_VALUE(mmc_gpio_get_cd(slot->mmc)))
+			break;
+	}
+	if (i == host->num_slots)
+		return;
+
+	spin_lock_irqsave(&host->irq_lock, irqflags);
+	temp = mci_readl(host, INTMASK);
+	temp  |= SDMMC_INT_CD;
+	mci_writel(host, INTMASK, temp);
+	spin_unlock_irqrestore(&host->irq_lock, irqflags);
+}
+
 int dw_mci_probe(struct dw_mci *host)
 {
 	const struct dw_mci_drv_data *drv_data = host->drv_data;
@@ -2652,6 +2781,9 @@
 		}
 	}
 
+	setup_timer(&host->cmd11_timer,
+		    dw_mci_cmd11_timer, (unsigned long)host);
+
 	host->quirks = host->pdata->quirks;
 
 	spin_lock_init(&host->lock);
@@ -2731,9 +2863,9 @@
 	dev_info(host->dev, "Version ID is %04x\n", host->verid);
 
 	if (host->verid < DW_MMC_240A)
-		host->data_offset = DATA_OFFSET;
+		host->fifo_reg = host->regs + DATA_OFFSET;
 	else
-		host->data_offset = DATA_240A_OFFSET;
+		host->fifo_reg = host->regs + DATA_240A_OFFSET;
 
 	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
 	ret = devm_request_irq(host->dev, host->irq, dw_mci_interrupt,
@@ -2747,13 +2879,13 @@
 		host->num_slots = ((mci_readl(host, HCON) >> 1) & 0x1F) + 1;
 
 	/*
-	 * Enable interrupts for command done, data over, data empty, card det,
+	 * Enable interrupts for command done, data over, data empty,
 	 * receive ready and error such as transmit, receive timeout, crc error
 	 */
 	mci_writel(host, RINTSTS, 0xFFFFFFFF);
 	mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
 		   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
-		   DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+		   DW_MCI_ERROR_FLAGS);
 	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
 
 	dev_info(host->dev, "DW MMC controller at irq %d, "
@@ -2778,6 +2910,9 @@
 		goto err_dmaunmap;
 	}
 
+	/* Now that slots are all setup, we can enable card detect */
+	dw_mci_enable_cd(host);
+
 	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
 		dev_info(host->dev, "Internal DMAC interrupt fix enabled.\n");
 
@@ -2864,7 +2999,7 @@
 	mci_writel(host, RINTSTS, 0xFFFFFFFF);
 	mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
 		   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
-		   DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+		   DW_MCI_ERROR_FLAGS);
 	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE);
 
 	for (i = 0; i < host->num_slots; i++) {
@@ -2876,6 +3011,10 @@
 			dw_mci_setup_bus(slot, true);
 		}
 	}
+
+	/* Now that slots are all setup, we can enable card detect */
+	dw_mci_enable_cd(host);
+
 	return 0;
 }
 EXPORT_SYMBOL(dw_mci_resume);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 18c4afe..f45ab91 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -169,24 +169,34 @@
 #define SDMMC_CTRL_ALL_RESET_FLAGS \
 	(SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
 
+/* FIFO register access macros. These should not change the data endian-ness
+ * as they are written to memory to be dealt with by the upper layers */
+#define mci_fifo_readw(__reg)	__raw_readw(__reg)
+#define mci_fifo_readl(__reg)	__raw_readl(__reg)
+#define mci_fifo_readq(__reg)	__raw_readq(__reg)
+
+#define mci_fifo_writew(__value, __reg)	__raw_writew(__reg, __value)
+#define mci_fifo_writel(__value, __reg)	__raw_writel(__reg, __value)
+#define mci_fifo_writeq(__value, __reg)	__raw_writeq(__reg, __value)
+
 /* Register access macros */
 #define mci_readl(dev, reg)			\
-	__raw_readl((dev)->regs + SDMMC_##reg)
+	readl_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writel(dev, reg, value)			\
-	__raw_writel((value), (dev)->regs + SDMMC_##reg)
+	writel_relaxed((value), (dev)->regs + SDMMC_##reg)
 
 /* 16-bit FIFO access macros */
 #define mci_readw(dev, reg)			\
-	__raw_readw((dev)->regs + SDMMC_##reg)
+	readw_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writew(dev, reg, value)			\
-	__raw_writew((value), (dev)->regs + SDMMC_##reg)
+	writew_relaxed((value), (dev)->regs + SDMMC_##reg)
 
 /* 64-bit FIFO access macros */
 #ifdef readq
 #define mci_readq(dev, reg)			\
-	__raw_readq((dev)->regs + SDMMC_##reg)
+	readq_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writeq(dev, reg, value)			\
-	__raw_writeq((value), (dev)->regs + SDMMC_##reg)
+	writeq_relaxed((value), (dev)->regs + SDMMC_##reg)
 #else
 /*
  * Dummy readq implementation for architectures that don't define it.
@@ -200,6 +210,10 @@
 	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg))
 #define mci_writeq(dev, reg, value)			\
 	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value))
+
+#define __raw_writeq(__value, __reg) \
+	(*(volatile u64 __force *)(__reg) = (__value))
+#define __raw_readq(__reg) (*(volatile u64 __force *)(__reg))
 #endif
 
 extern int dw_mci_probe(struct dw_mci *host);
@@ -271,5 +285,7 @@
 	void		(*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
 	int		(*parse_dt)(struct dw_mci *host);
 	int		(*execute_tuning)(struct dw_mci_slot *slot);
+	int		(*prepare_hs400_tuning)(struct dw_mci *host,
+						struct mmc_ios *ios);
 };
 #endif /* _DW_MMC_H_ */
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index e4a0754..ae19d83 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1507,7 +1507,7 @@
 	return 0;
 }
 
-static struct of_device_id mmc_spi_of_match_table[] = {
+static const struct of_device_id mmc_spi_of_match_table[] = {
 	{ .compatible = "mmc-spi-slot", },
 	{},
 };
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7fe1619..fb26674 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1613,7 +1613,10 @@
 	dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
 
 	/* Get regulators and the supported OCR mask */
-	mmc_regulator_get_supply(mmc);
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		goto clk_disable;
+
 	if (!mmc->ocr_avail)
 		mmc->ocr_avail = plat->ocr_mask;
 	else if (plat->ocr_mask)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f84cfb0..9df2b68 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -222,10 +222,6 @@
 	struct omap_hsmmc_next	next_data;
 	struct	omap_hsmmc_platform_data	*pdata;
 
-	/* To handle board related suspend/resume functionality for MMC */
-	int (*suspend)(struct device *dev);
-	int (*resume)(struct device *dev);
-
 	/* return MMC cover switch state, can be NULL if not supported.
 	 *
 	 * possible return values:
@@ -234,12 +230,7 @@
 	 */
 	int (*get_cover_state)(struct device *dev);
 
-	/* Card detection IRQs */
-	int card_detect_irq;
-
 	int (*card_detect)(struct device *dev);
-	int (*get_ro)(struct device *dev);
-
 };
 
 struct omap_mmc_of_data {
@@ -256,13 +247,6 @@
 	return mmc_gpio_get_cd(host->mmc);
 }
 
-static int omap_hsmmc_get_wp(struct device *dev)
-{
-	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-
-	return mmc_gpio_get_ro(host->mmc);
-}
-
 static int omap_hsmmc_get_cover_state(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
@@ -434,7 +418,7 @@
 
 #endif
 
-static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id);
+static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id);
 
 static int omap_hsmmc_gpio_init(struct mmc_host *mmc,
 				struct omap_hsmmc_host *host,
@@ -442,29 +426,25 @@
 {
 	int ret;
 
-	if (gpio_is_valid(pdata->switch_pin)) {
-		if (pdata->cover)
-			host->get_cover_state =
-				omap_hsmmc_get_cover_state;
-		else
-			host->card_detect = omap_hsmmc_card_detect;
-		host->card_detect_irq =
-				gpio_to_irq(pdata->switch_pin);
-		mmc_gpio_set_cd_isr(mmc, omap_hsmmc_detect);
-		ret = mmc_gpio_request_cd(mmc, pdata->switch_pin, 0);
+	if (gpio_is_valid(pdata->gpio_cod)) {
+		ret = mmc_gpio_request_cd(mmc, pdata->gpio_cod, 0);
 		if (ret)
 			return ret;
-	} else {
-		pdata->switch_pin = -EINVAL;
+
+		host->get_cover_state = omap_hsmmc_get_cover_state;
+		mmc_gpio_set_cd_isr(mmc, omap_hsmmc_cover_irq);
+	} else if (gpio_is_valid(pdata->gpio_cd)) {
+		ret = mmc_gpio_request_cd(mmc, pdata->gpio_cd, 0);
+		if (ret)
+			return ret;
+
+		host->card_detect = omap_hsmmc_card_detect;
 	}
 
 	if (gpio_is_valid(pdata->gpio_wp)) {
-		host->get_ro = omap_hsmmc_get_wp;
 		ret = mmc_gpio_request_ro(mmc, pdata->gpio_wp);
 		if (ret)
 			return ret;
-	} else {
-		pdata->gpio_wp = -EINVAL;
 	}
 
 	return 0;
@@ -882,6 +862,8 @@
 		return;
 	host->mrq = NULL;
 	mmc_request_done(host->mmc, mrq);
+	pm_runtime_mark_last_busy(host->dev);
+	pm_runtime_put_autosuspend(host->dev);
 }
 
 /*
@@ -1252,26 +1234,16 @@
 }
 
 /*
- * irq handler to notify the core about card insertion/removal
+ * irq handler when (cell-phone) cover is mounted/removed
  */
-static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
+static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id)
 {
 	struct omap_hsmmc_host *host = dev_id;
-	int carddetect;
 
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
-	if (host->card_detect)
-		carddetect = host->card_detect(host->dev);
-	else {
-		omap_hsmmc_protect_card(host);
-		carddetect = -ENOSYS;
-	}
-
-	if (carddetect)
-		mmc_detect_change(host->mmc, (HZ * 200) / 1000);
-	else
-		mmc_detect_change(host->mmc, (HZ * 50) / 1000);
+	omap_hsmmc_protect_card(host);
+	mmc_detect_change(host->mmc, (HZ * 200) / 1000);
 	return IRQ_HANDLED;
 }
 
@@ -1305,6 +1277,8 @@
 
 		host->mrq = NULL;
 		mmc_request_done(host->mmc, mrq);
+		pm_runtime_mark_last_busy(host->dev);
+		pm_runtime_put_autosuspend(host->dev);
 	}
 }
 
@@ -1537,6 +1511,7 @@
 
 	BUG_ON(host->req_in_progress);
 	BUG_ON(host->dma_ch != -1);
+	pm_runtime_get_sync(host->dev);
 	if (host->protect_card) {
 		if (host->reqs_blocked < 3) {
 			/*
@@ -1553,6 +1528,8 @@
 			req->data->error = -EBADF;
 		req->cmd->retries = 0;
 		mmc_request_done(mmc, req);
+		pm_runtime_mark_last_busy(host->dev);
+		pm_runtime_put_autosuspend(host->dev);
 		return;
 	} else if (host->reqs_blocked)
 		host->reqs_blocked = 0;
@@ -1566,6 +1543,8 @@
 			req->data->error = err;
 		host->mrq = NULL;
 		mmc_request_done(mmc, req);
+		pm_runtime_mark_last_busy(host->dev);
+		pm_runtime_put_autosuspend(host->dev);
 		return;
 	}
 	if (req->sbc && !(host->flags & AUTO_CMD23)) {
@@ -1641,15 +1620,6 @@
 	return host->card_detect(host->dev);
 }
 
-static int omap_hsmmc_get_ro(struct mmc_host *mmc)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	if (!host->get_ro)
-		return -ENOSYS;
-	return host->get_ro(host->dev);
-}
-
 static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
@@ -1778,25 +1748,6 @@
 	set_sd_bus_power(host);
 }
 
-static int omap_hsmmc_enable_fclk(struct mmc_host *mmc)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	pm_runtime_get_sync(host->dev);
-
-	return 0;
-}
-
-static int omap_hsmmc_disable_fclk(struct mmc_host *mmc)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
-
-	return 0;
-}
-
 static int omap_hsmmc_multi_io_quirk(struct mmc_card *card,
 				     unsigned int direction, int blk_size)
 {
@@ -1808,14 +1759,12 @@
 }
 
 static struct mmc_host_ops omap_hsmmc_ops = {
-	.enable = omap_hsmmc_enable_fclk,
-	.disable = omap_hsmmc_disable_fclk,
 	.post_req = omap_hsmmc_post_req,
 	.pre_req = omap_hsmmc_pre_req,
 	.request = omap_hsmmc_request,
 	.set_ios = omap_hsmmc_set_ios,
 	.get_cd = omap_hsmmc_get_cd,
-	.get_ro = omap_hsmmc_get_ro,
+	.get_ro = mmc_gpio_get_ro,
 	.init_card = omap_hsmmc_init_card,
 	.enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
 };
@@ -1937,7 +1886,8 @@
 	if (of_find_property(np, "ti,dual-volt", NULL))
 		pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
 
-	pdata->switch_pin = -EINVAL;
+	pdata->gpio_cd = -EINVAL;
+	pdata->gpio_cod = -EINVAL;
 	pdata->gpio_wp = -EINVAL;
 
 	if (of_find_property(np, "ti,non-removable", NULL)) {
@@ -2179,9 +2129,9 @@
 		if (ret < 0)
 			goto err_slot_name;
 	}
-	if (host->card_detect_irq && host->get_cover_state) {
+	if (host->get_cover_state) {
 		ret = device_create_file(&mmc->class_dev,
-					&dev_attr_cover_switch);
+					 &dev_attr_cover_switch);
 		if (ret < 0)
 			goto err_slot_name;
 	}
@@ -2236,7 +2186,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int omap_hsmmc_suspend(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
@@ -2292,10 +2242,6 @@
 	pm_runtime_put_autosuspend(host->dev);
 	return 0;
 }
-
-#else
-#define omap_hsmmc_suspend	NULL
-#define omap_hsmmc_resume	NULL
 #endif
 
 static int omap_hsmmc_runtime_suspend(struct device *dev)
@@ -2376,8 +2322,7 @@
 }
 
 static struct dev_pm_ops omap_hsmmc_dev_pm_ops = {
-	.suspend	= omap_hsmmc_suspend,
-	.resume		= omap_hsmmc_resume,
+	SET_SYSTEM_SLEEP_PM_OPS(omap_hsmmc_suspend, omap_hsmmc_resume)
 	.runtime_suspend = omap_hsmmc_runtime_suspend,
 	.runtime_resume = omap_hsmmc_runtime_resume,
 };
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index a45ed39..22d929f 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -40,7 +40,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/pm.h>
 #include <linux/mmc/slot-gpio.h>
-#include <linux/mmc/sdhci.h>
 
 #include "sdhci.h"
 
diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 34bb8f9..2bd90fb 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -54,7 +54,6 @@
 
 struct sdhci_bcm_kona_dev {
 	struct mutex	write_lock; /* protect back to back writes */
-	struct clk	*external_clk;
 };
 
 
@@ -175,24 +174,6 @@
 	}
 }
 
-/*
- * Get the base clock. Use central clock source for now. Not sure if different
- * clock speed to each dev is allowed
- */
-static unsigned int sdhci_bcm_kona_get_max_clk(struct sdhci_host *host)
-{
-	struct sdhci_bcm_kona_dev *kona_dev;
-	struct sdhci_pltfm_host *pltfm_priv = sdhci_priv(host);
-	kona_dev = sdhci_pltfm_priv(pltfm_priv);
-
-	return host->mmc->f_max;
-}
-
-static unsigned int sdhci_bcm_kona_get_timeout_clock(struct sdhci_host *host)
-{
-	return sdhci_bcm_kona_get_max_clk(host);
-}
-
 static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host,
 				u8 power_mode)
 {
@@ -207,8 +188,8 @@
 
 static struct sdhci_ops sdhci_bcm_kona_ops = {
 	.set_clock = sdhci_set_clock,
-	.get_max_clock = sdhci_bcm_kona_get_max_clk,
-	.get_timeout_clock = sdhci_bcm_kona_get_timeout_clock,
+	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
+	.get_timeout_clock = sdhci_pltfm_clk_get_max_clock,
 	.platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
@@ -264,21 +245,21 @@
 		goto err_pltfm_free;
 	}
 
-	/* Get and enable the external clock */
-	kona_dev->external_clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(kona_dev->external_clk)) {
-		dev_err(dev, "Failed to get external clock\n");
-		ret = PTR_ERR(kona_dev->external_clk);
+	/* Get and enable the core clock */
+	pltfm_priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pltfm_priv->clk)) {
+		dev_err(dev, "Failed to get core clock\n");
+		ret = PTR_ERR(pltfm_priv->clk);
 		goto err_pltfm_free;
 	}
 
-	if (clk_set_rate(kona_dev->external_clk, host->mmc->f_max) != 0) {
-		dev_err(dev, "Failed to set rate external clock\n");
+	if (clk_set_rate(pltfm_priv->clk, host->mmc->f_max) != 0) {
+		dev_err(dev, "Failed to set rate core clock\n");
 		goto err_pltfm_free;
 	}
 
-	if (clk_prepare_enable(kona_dev->external_clk) != 0) {
-		dev_err(dev, "Failed to enable external clock\n");
+	if (clk_prepare_enable(pltfm_priv->clk) != 0) {
+		dev_err(dev, "Failed to enable core clock\n");
 		goto err_pltfm_free;
 	}
 
@@ -333,7 +314,7 @@
 	sdhci_bcm_kona_sd_reset(host);
 
 err_clk_disable:
-	clk_disable_unprepare(kona_dev->external_clk);
+	clk_disable_unprepare(pltfm_priv->clk);
 
 err_pltfm_free:
 	sdhci_pltfm_free(pdev);
@@ -342,22 +323,6 @@
 	return ret;
 }
 
-static int sdhci_bcm_kona_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_priv = sdhci_priv(host);
-	struct sdhci_bcm_kona_dev *kona_dev = sdhci_pltfm_priv(pltfm_priv);
-	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-	sdhci_remove_host(host, dead);
-
-	clk_disable_unprepare(kona_dev->external_clk);
-
-	sdhci_pltfm_free(pdev);
-
-	return 0;
-}
-
 static struct platform_driver sdhci_bcm_kona_driver = {
 	.driver		= {
 		.name	= "sdhci-kona",
@@ -365,7 +330,7 @@
 		.of_match_table = sdhci_bcm_kona_of_match,
 	},
 	.probe		= sdhci_bcm_kona_probe,
-	.remove		= sdhci_bcm_kona_remove,
+	.remove		= sdhci_pltfm_unregister,
 };
 module_platform_driver(sdhci_bcm_kona_driver);
 
diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index 439d259..0ef0343 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -180,11 +180,6 @@
 	return ret;
 }
 
-static int bcm2835_sdhci_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
 static const struct of_device_id bcm2835_sdhci_of_match[] = {
 	{ .compatible = "brcm,bcm2835-sdhci" },
 	{ }
@@ -198,7 +193,7 @@
 		.pm = SDHCI_PLTFM_PMOPS,
 	},
 	.probe = bcm2835_sdhci_probe,
-	.remove = bcm2835_sdhci_remove,
+	.remove = sdhci_pltfm_unregister,
 };
 module_platform_driver(bcm2835_sdhci_driver);
 
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index a7935a8..59f2923 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -98,18 +98,13 @@
 	return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata, 0);
 }
 
-static int sdhci_cns3xxx_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
 static struct platform_driver sdhci_cns3xxx_driver = {
 	.driver		= {
 		.name	= "sdhci-cns3xxx",
 		.pm	= SDHCI_PLTFM_PMOPS,
 	},
 	.probe		= sdhci_cns3xxx_probe,
-	.remove		= sdhci_cns3xxx_remove,
+	.remove		= sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_cns3xxx_driver);
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index ca969d2..407c21f 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -28,10 +28,6 @@
 
 #include "sdhci-pltfm.h"
 
-struct sdhci_dove_priv {
-	struct clk *clk;
-};
-
 static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
 {
 	u16 ret;
@@ -84,27 +80,17 @@
 {
 	struct sdhci_host *host;
 	struct sdhci_pltfm_host *pltfm_host;
-	struct sdhci_dove_priv *priv;
 	int ret;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_dove_priv),
-			    GFP_KERNEL);
-	if (!priv) {
-		dev_err(&pdev->dev, "unable to allocate private data");
-		return -ENOMEM;
-	}
-
-	priv->clk = devm_clk_get(&pdev->dev, NULL);
-
 	host = sdhci_pltfm_init(pdev, &sdhci_dove_pdata, 0);
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = priv;
+	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
 
-	if (!IS_ERR(priv->clk))
-		clk_prepare_enable(priv->clk);
+	if (!IS_ERR(pltfm_host->clk))
+		clk_prepare_enable(pltfm_host->clk);
 
 	ret = mmc_of_parse(host->mmc);
 	if (ret)
@@ -117,26 +103,11 @@
 	return 0;
 
 err_sdhci_add:
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(pltfm_host->clk);
 	sdhci_pltfm_free(pdev);
 	return ret;
 }
 
-static int sdhci_dove_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_dove_priv *priv = pltfm_host->priv;
-
-	sdhci_pltfm_unregister(pdev);
-
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
-
-	return 0;
-}
-
 static const struct of_device_id sdhci_dove_of_match_table[] = {
 	{ .compatible = "marvell,dove-sdhci", },
 	{}
@@ -150,7 +121,7 @@
 		.of_match_table = sdhci_dove_of_match_table,
 	},
 	.probe		= sdhci_dove_probe,
-	.remove		= sdhci_dove_remove,
+	.remove		= sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_dove_driver);
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 10ef824..82f512d 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -416,7 +416,7 @@
 			new_val |= ESDHC_VENDOR_SPEC_FRC_SDCLK_ON;
 		else
 			new_val &= ~ESDHC_VENDOR_SPEC_FRC_SDCLK_ON;
-			writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
+		writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
 		return;
 	case SDHCI_HOST_CONTROL2:
 		new_val = readl(host->ioaddr + ESDHC_VENDOR_SPEC);
@@ -864,6 +864,7 @@
 #ifdef CONFIG_OF
 static int
 sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
+			 struct sdhci_host *host,
 			 struct esdhc_platform_data *boarddata)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -900,11 +901,14 @@
 	if (of_property_read_u32(np, "fsl,delay-line", &boarddata->delay_line))
 		boarddata->delay_line = 0;
 
+	mmc_of_parse_voltage(np, &host->ocr_mask);
+
 	return 0;
 }
 #else
 static inline int
 sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
+			 struct sdhci_host *host,
 			 struct esdhc_platform_data *boarddata)
 {
 	return -ENODEV;
@@ -999,7 +1003,7 @@
 			host->ioaddr + ESDHC_TUNING_CTRL);
 
 	boarddata = &imx_data->boarddata;
-	if (sdhci_esdhc_imx_probe_dt(pdev, boarddata) < 0) {
+	if (sdhci_esdhc_imx_probe_dt(pdev, host, boarddata) < 0) {
 		if (!host->mmc->parent->platform_data) {
 			dev_err(mmc_dev(host->mmc), "no board data!\n");
 			err = -EINVAL;
@@ -1009,40 +1013,9 @@
 					host->mmc->parent->platform_data);
 	}
 
-	/* write_protect */
-	if (boarddata->wp_type == ESDHC_WP_GPIO) {
-		err = mmc_gpio_request_ro(host->mmc, boarddata->wp_gpio);
-		if (err) {
-			dev_err(mmc_dev(host->mmc),
-				"failed to request write-protect gpio!\n");
-			goto disable_clk;
-		}
-		host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
-	}
-
 	/* card_detect */
-	switch (boarddata->cd_type) {
-	case ESDHC_CD_GPIO:
-		err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0);
-		if (err) {
-			dev_err(mmc_dev(host->mmc),
-				"failed to request card-detect gpio!\n");
-			goto disable_clk;
-		}
-		/* fall through */
-
-	case ESDHC_CD_CONTROLLER:
-		/* we have a working card_detect back */
+	if (boarddata->cd_type == ESDHC_CD_CONTROLLER)
 		host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-		break;
-
-	case ESDHC_CD_PERMANENT:
-		host->mmc->caps |= MMC_CAP_NONREMOVABLE;
-		break;
-
-	case ESDHC_CD_NONE:
-		break;
-	}
 
 	switch (boarddata->max_bus_width) {
 	case 8:
@@ -1075,6 +1048,11 @@
 		host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
 	}
 
+	/* call to generic mmc_of_parse to support additional capabilities */
+	err = mmc_of_parse(host->mmc);
+	if (err)
+		goto disable_clk;
+
 	err = sdhci_add_host(host);
 	if (err)
 		goto disable_clk;
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
new file mode 100644
index 0000000..3b423b0
--- /dev/null
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2014 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * iProc SDHCI platform driver
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include "sdhci-pltfm.h"
+
+struct sdhci_iproc_data {
+	const struct sdhci_pltfm_data *pdata;
+	u32 caps;
+	u32 caps1;
+};
+
+struct sdhci_iproc_host {
+	const struct sdhci_iproc_data *data;
+	u32 shadow_cmd;
+	u32 shadow_blk;
+};
+
+#define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18)
+
+static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg)
+{
+	u32 val = readl(host->ioaddr + reg);
+
+	pr_debug("%s: readl [0x%02x] 0x%08x\n",
+		 mmc_hostname(host->mmc), reg, val);
+	return val;
+}
+
+static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg)
+{
+	u32 val = sdhci_iproc_readl(host, (reg & ~3));
+	u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
+	return word;
+}
+
+static u8 sdhci_iproc_readb(struct sdhci_host *host, int reg)
+{
+	u32 val = sdhci_iproc_readl(host, (reg & ~3));
+	u8 byte = val >> REG_OFFSET_IN_BITS(reg) & 0xff;
+	return byte;
+}
+
+static inline void sdhci_iproc_writel(struct sdhci_host *host, u32 val, int reg)
+{
+	pr_debug("%s: writel [0x%02x] 0x%08x\n",
+		 mmc_hostname(host->mmc), reg, val);
+
+	writel(val, host->ioaddr + reg);
+
+	if (host->clock <= 400000) {
+		/* Round up to micro-second four SD clock delay */
+		if (host->clock)
+			udelay((4 * 1000000 + host->clock - 1) / host->clock);
+		else
+			udelay(10);
+	}
+}
+
+/*
+ * The Arasan has a bugette whereby it may lose the content of successive
+ * writes to the same register that are within two SD-card clock cycles of
+ * each other (a clock domain crossing problem). The data
+ * register does not have this problem, which is just as well - otherwise we'd
+ * have to nobble the DMA engine too.
+ *
+ * This wouldn't be a problem with the code except that we can only write the
+ * controller with 32-bit writes.  So two different 16-bit registers are
+ * written back to back creates the problem.
+ *
+ * In reality, this only happens when SDHCI_BLOCK_SIZE and SDHCI_BLOCK_COUNT
+ * are written followed by SDHCI_TRANSFER_MODE and SDHCI_COMMAND.
+ * The BLOCK_SIZE and BLOCK_COUNT are meaningless until a command issued so
+ * the work around can be further optimized. We can keep shadow values of
+ * BLOCK_SIZE, BLOCK_COUNT, and TRANSFER_MODE until a COMMAND is issued.
+ * Then, write the BLOCK_SIZE+BLOCK_COUNT in a single 32-bit write followed
+ * by the TRANSFER+COMMAND in another 32-bit write.
+ */
+static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host);
+	u32 word_shift = REG_OFFSET_IN_BITS(reg);
+	u32 mask = 0xffff << word_shift;
+	u32 oldval, newval;
+
+	if (reg == SDHCI_COMMAND) {
+		/* Write the block now as we are issuing a command */
+		if (iproc_host->shadow_blk != 0) {
+			sdhci_iproc_writel(host, iproc_host->shadow_blk,
+				SDHCI_BLOCK_SIZE);
+			iproc_host->shadow_blk = 0;
+		}
+		oldval = iproc_host->shadow_cmd;
+	} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+		/* Block size and count are stored in shadow reg */
+		oldval = iproc_host->shadow_blk;
+	} else {
+		/* Read reg, all other registers are not shadowed */
+		oldval = sdhci_iproc_readl(host, (reg & ~3));
+	}
+	newval = (oldval & ~mask) | (val << word_shift);
+
+	if (reg == SDHCI_TRANSFER_MODE) {
+		/* Save the transfer mode until the command is issued */
+		iproc_host->shadow_cmd = newval;
+	} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+		/* Save the block info until the command is issued */
+		iproc_host->shadow_blk = newval;
+	} else {
+		/* Command or other regular 32-bit write */
+		sdhci_iproc_writel(host, newval, reg & ~3);
+	}
+}
+
+static void sdhci_iproc_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+	u32 oldval = sdhci_iproc_readl(host, (reg & ~3));
+	u32 byte_shift = REG_OFFSET_IN_BITS(reg);
+	u32 mask = 0xff << byte_shift;
+	u32 newval = (oldval & ~mask) | (val << byte_shift);
+
+	sdhci_iproc_writel(host, newval, reg & ~3);
+}
+
+static const struct sdhci_ops sdhci_iproc_ops = {
+	.read_l = sdhci_iproc_readl,
+	.read_w = sdhci_iproc_readw,
+	.read_b = sdhci_iproc_readb,
+	.write_l = sdhci_iproc_writel,
+	.write_w = sdhci_iproc_writew,
+	.write_b = sdhci_iproc_writeb,
+	.set_clock = sdhci_set_clock,
+	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
+};
+
+static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
+	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
+	.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
+	.ops = &sdhci_iproc_ops,
+};
+
+static const struct sdhci_iproc_data iproc_data = {
+	.pdata = &sdhci_iproc_pltfm_data,
+	.caps = 0x05E90000,
+	.caps1 = 0x00000064,
+};
+
+static const struct of_device_id sdhci_iproc_of_match[] = {
+	{ .compatible = "brcm,sdhci-iproc-cygnus", .data = &iproc_data },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sdhci_iproc_of_match);
+
+static int sdhci_iproc_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	const struct sdhci_iproc_data *iproc_data;
+	struct sdhci_host *host;
+	struct sdhci_iproc_host *iproc_host;
+	struct sdhci_pltfm_host *pltfm_host;
+	int ret;
+
+	match = of_match_device(sdhci_iproc_of_match, &pdev->dev);
+	if (!match)
+		return -EINVAL;
+	iproc_data = match->data;
+
+	host = sdhci_pltfm_init(pdev, iproc_data->pdata, sizeof(*iproc_host));
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+	iproc_host = sdhci_pltfm_priv(pltfm_host);
+
+	iproc_host->data = iproc_data;
+
+	mmc_of_parse(host->mmc);
+	sdhci_get_of_property(pdev);
+
+	/* Enable EMMC 1/8V DDR capable */
+	host->mmc->caps |= MMC_CAP_1_8V_DDR;
+
+	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pltfm_host->clk)) {
+		ret = PTR_ERR(pltfm_host->clk);
+		goto err;
+	}
+
+	if (iproc_host->data->pdata->quirks & SDHCI_QUIRK_MISSING_CAPS) {
+		host->caps = iproc_host->data->caps;
+		host->caps1 = iproc_host->data->caps1;
+	}
+
+	return sdhci_add_host(host);
+
+err:
+	sdhci_pltfm_free(pdev);
+	return ret;
+}
+
+static int sdhci_iproc_remove(struct platform_device *pdev)
+{
+	return sdhci_pltfm_unregister(pdev);
+}
+
+static struct platform_driver sdhci_iproc_driver = {
+	.driver = {
+		.name = "sdhci-iproc",
+		.of_match_table = sdhci_iproc_of_match,
+		.pm = SDHCI_PLTFM_PMOPS,
+	},
+	.probe = sdhci_iproc_probe,
+	.remove = sdhci_iproc_remove,
+};
+module_platform_driver(sdhci_iproc_driver);
+
+MODULE_AUTHOR("Broadcom");
+MODULE_DESCRIPTION("IPROC SDHCI driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 3d32ce8..4a09f76 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -22,6 +22,11 @@
 
 #include "sdhci-pltfm.h"
 
+#define CORE_MCI_VERSION		0x50
+#define CORE_VERSION_MAJOR_SHIFT	28
+#define CORE_VERSION_MAJOR_MASK		(0xf << CORE_VERSION_MAJOR_SHIFT)
+#define CORE_VERSION_MINOR_MASK		0xff
+
 #define CORE_HC_MODE		0x78
 #define HC_MODE_EN		0x1
 #define CORE_POWER		0x0
@@ -41,6 +46,8 @@
 #define CORE_VENDOR_SPEC	0x10c
 #define CORE_CLK_PWRSAVE	BIT(1)
 
+#define CORE_VENDOR_SPEC_CAPABILITIES0	0x11c
+
 #define CDR_SELEXT_SHIFT	20
 #define CDR_SELEXT_MASK		(0xf << CDR_SELEXT_SHIFT)
 #define CMUX_SHIFT_PHASE_SHIFT	24
@@ -426,7 +433,9 @@
 	struct sdhci_msm_host *msm_host;
 	struct resource *core_memres;
 	int ret;
-	u16 host_version;
+	u16 host_version, core_minor;
+	u32 core_version, caps;
+	u8 core_major;
 
 	msm_host = devm_kzalloc(&pdev->dev, sizeof(*msm_host), GFP_KERNEL);
 	if (!msm_host)
@@ -516,6 +525,24 @@
 		host_version, ((host_version & SDHCI_VENDOR_VER_MASK) >>
 			       SDHCI_VENDOR_VER_SHIFT));
 
+	core_version = readl_relaxed(msm_host->core_mem + CORE_MCI_VERSION);
+	core_major = (core_version & CORE_VERSION_MAJOR_MASK) >>
+		      CORE_VERSION_MAJOR_SHIFT;
+	core_minor = core_version & CORE_VERSION_MINOR_MASK;
+	dev_dbg(&pdev->dev, "MCI Version: 0x%08x, major: 0x%04x, minor: 0x%02x\n",
+		core_version, core_major, core_minor);
+
+	/*
+	 * Support for some capabilities is not advertised by newer
+	 * controller versions and must be explicitly enabled.
+	 */
+	if (core_major >= 1 && core_minor != 0x11 && core_minor != 0x12) {
+		caps = readl_relaxed(host->ioaddr + SDHCI_CAPABILITIES);
+		caps |= SDHCI_CAN_VDD_300 | SDHCI_CAN_DO_8BIT;
+		writel_relaxed(caps, host->ioaddr +
+			       CORE_VENDOR_SPEC_CAPABILITIES0);
+	}
+
 	ret = sdhci_add_host(host);
 	if (ret)
 		goto clk_disable;
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index bcb51e9..6287d42 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -173,6 +173,12 @@
 	pltfm_host->priv = sdhci_arasan;
 	pltfm_host->clk = clk_xin;
 
+	ret = mmc_of_parse(host->mmc);
+	if (ret) {
+		dev_err(&pdev->dev, "parsing dt failed (%u)\n", ret);
+		goto clk_disable_all;
+	}
+
 	ret = sdhci_add_host(host);
 	if (ret)
 		goto err_pltfm_free;
@@ -195,7 +201,6 @@
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_arasan_data *sdhci_arasan = pltfm_host->priv;
 
-	clk_disable_unprepare(pltfm_host->clk);
 	clk_disable_unprepare(sdhci_arasan->clk_ahb);
 
 	return sdhci_pltfm_unregister(pdev);
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 17fe02e..22e9111 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -386,11 +386,6 @@
 	return ret;
 }
 
-static int sdhci_esdhc_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
 static const struct of_device_id sdhci_esdhc_of_match[] = {
 	{ .compatible = "fsl,mpc8379-esdhc" },
 	{ .compatible = "fsl,mpc8536-esdhc" },
@@ -406,7 +401,7 @@
 		.pm = ESDHC_PMOPS,
 	},
 	.probe = sdhci_esdhc_probe,
-	.remove = sdhci_esdhc_remove,
+	.remove = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_esdhc_driver);
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index be47927..4079a96 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -75,11 +75,6 @@
 	return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata, 0);
 }
 
-static int sdhci_hlwd_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
 static const struct of_device_id sdhci_hlwd_of_match[] = {
 	{ .compatible = "nintendo,hollywood-sdhci" },
 	{ }
@@ -93,7 +88,7 @@
 		.pm = SDHCI_PLTFM_PMOPS,
 	},
 	.probe = sdhci_hlwd_probe,
-	.remove = sdhci_hlwd_remove,
+	.remove = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_hlwd_driver);
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 29eaff7..7a3fc16 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -650,6 +650,7 @@
 
 static const struct sdhci_pci_fixes sdhci_rtsx = {
 	.quirks2	= SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+			SDHCI_QUIRK2_BROKEN_64_BIT_DMA |
 			SDHCI_QUIRK2_BROKEN_DDR50,
 	.probe_slot	= rtsx_probe_slot,
 };
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index c5b01d6..a207f5a 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -75,43 +75,41 @@
 	u32 bus_width;
 	int size;
 
-	if (of_device_is_available(np)) {
-		if (of_get_property(np, "sdhci,auto-cmd12", NULL))
-			host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
+	if (of_get_property(np, "sdhci,auto-cmd12", NULL))
+		host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
 
-		if (of_get_property(np, "sdhci,1-bit-only", NULL) ||
-		    (of_property_read_u32(np, "bus-width", &bus_width) == 0 &&
-		    bus_width == 1))
-			host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+	if (of_get_property(np, "sdhci,1-bit-only", NULL) ||
+	    (of_property_read_u32(np, "bus-width", &bus_width) == 0 &&
+	    bus_width == 1))
+		host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
 
-		if (sdhci_of_wp_inverted(np))
-			host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+	if (sdhci_of_wp_inverted(np))
+		host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
 
-		if (of_get_property(np, "broken-cd", NULL))
-			host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+	if (of_get_property(np, "broken-cd", NULL))
+		host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
 
-		if (of_get_property(np, "no-1-8-v", NULL))
-			host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
+	if (of_get_property(np, "no-1-8-v", NULL))
+		host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
 
-		if (of_device_is_compatible(np, "fsl,p2020-rev1-esdhc"))
-			host->quirks |= SDHCI_QUIRK_BROKEN_DMA;
+	if (of_device_is_compatible(np, "fsl,p2020-rev1-esdhc"))
+		host->quirks |= SDHCI_QUIRK_BROKEN_DMA;
 
-		if (of_device_is_compatible(np, "fsl,p2020-esdhc") ||
-		    of_device_is_compatible(np, "fsl,p1010-esdhc") ||
-		    of_device_is_compatible(np, "fsl,t4240-esdhc") ||
-		    of_device_is_compatible(np, "fsl,mpc8536-esdhc"))
-			host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+	if (of_device_is_compatible(np, "fsl,p2020-esdhc") ||
+	    of_device_is_compatible(np, "fsl,p1010-esdhc") ||
+	    of_device_is_compatible(np, "fsl,t4240-esdhc") ||
+	    of_device_is_compatible(np, "fsl,mpc8536-esdhc"))
+		host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
-		clk = of_get_property(np, "clock-frequency", &size);
-		if (clk && size == sizeof(*clk) && *clk)
-			pltfm_host->clock = be32_to_cpup(clk);
+	clk = of_get_property(np, "clock-frequency", &size);
+	if (clk && size == sizeof(*clk) && *clk)
+		pltfm_host->clock = be32_to_cpup(clk);
 
-		if (of_find_property(np, "keep-power-in-suspend", NULL))
-			host->mmc->pm_caps |= MMC_PM_KEEP_POWER;
+	if (of_find_property(np, "keep-power-in-suspend", NULL))
+		host->mmc->pm_caps |= MMC_PM_KEEP_POWER;
 
-		if (of_find_property(np, "enable-sdio-wakeup", NULL))
-			host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
-	}
+	if (of_find_property(np, "enable-sdio-wakeup", NULL))
+		host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
 }
 #else
 void sdhci_get_of_property(struct platform_device *pdev) {}
@@ -225,9 +223,11 @@
 int sdhci_pltfm_unregister(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
 
 	sdhci_remove_host(host, dead);
+	clk_disable_unprepare(pltfm_host->clk);
 	sdhci_pltfm_free(pdev);
 
 	return 0;
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index f6f82ec..32848eb 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -20,17 +20,9 @@
 #define SIRF_TUNING_COUNT 128
 
 struct sdhci_sirf_priv {
-	struct clk *clk;
 	int gpio_cd;
 };
 
-static unsigned int sdhci_sirf_get_max_clk(struct sdhci_host *host)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
-	return clk_get_rate(priv->clk);
-}
-
 static void sdhci_sirf_set_bus_width(struct sdhci_host *host, int width)
 {
 	u8 ctrl;
@@ -56,7 +48,7 @@
 	int tuning_seq_cnt = 3;
 	u8 phase, tuned_phases[SIRF_TUNING_COUNT];
 	u8 tuned_phase_cnt = 0;
-	int rc, longest_range = 0;
+	int rc = 0, longest_range = 0;
 	int start = -1, end = 0, tuning_value = -1, range = 0;
 	u16 clock_setting;
 	struct mmc_host *mmc = host->mmc;
@@ -68,7 +60,7 @@
 	phase = 0;
 	do {
 		sdhci_writel(host,
-			clock_setting | phase | (phase << 7) | (phase << 16),
+			clock_setting | phase,
 			SDHCI_CLK_DELAY_SETTING);
 
 		if (!mmc_send_tuning(mmc)) {
@@ -102,7 +94,7 @@
 		 */
 		phase = tuning_value;
 		sdhci_writel(host,
-			clock_setting | phase | (phase << 7) | (phase << 16),
+			clock_setting | phase,
 			SDHCI_CLK_DELAY_SETTING);
 
 		dev_dbg(mmc_dev(mmc), "%s: Setting the tuning phase to %d\n",
@@ -122,7 +114,7 @@
 static struct sdhci_ops sdhci_sirf_ops = {
 	.platform_execute_tuning = sdhci_sirf_execute_tuning,
 	.set_clock = sdhci_set_clock,
-	.get_max_clock	= sdhci_sirf_get_max_clk,
+	.get_max_clock	= sdhci_pltfm_clk_get_max_clock,
 	.set_bus_width = sdhci_sirf_set_bus_width,
 	.reset = sdhci_reset,
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -162,13 +154,13 @@
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
+	pltfm_host->clk = clk;
 	priv = sdhci_pltfm_priv(pltfm_host);
-	priv->clk = clk;
 	priv->gpio_cd = gpio_cd;
 
 	sdhci_get_of_property(pdev);
 
-	ret = clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(pltfm_host->clk);
 	if (ret)
 		goto err_clk_prepare;
 
@@ -195,37 +187,24 @@
 err_request_cd:
 	sdhci_remove_host(host, 0);
 err_sdhci_add:
-	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(pltfm_host->clk);
 err_clk_prepare:
 	sdhci_pltfm_free(pdev);
 	return ret;
 }
 
-static int sdhci_sirf_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
-
-	sdhci_pltfm_unregister(pdev);
-
-	clk_disable_unprepare(priv->clk);
-	return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int sdhci_sirf_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = sdhci_suspend_host(host);
 	if (ret)
 		return ret;
 
-	clk_disable(priv->clk);
+	clk_disable(pltfm_host->clk);
 
 	return 0;
 }
@@ -234,10 +213,9 @@
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
-	ret = clk_enable(priv->clk);
+	ret = clk_enable(pltfm_host->clk);
 	if (ret) {
 		dev_dbg(dev, "Resume: Error enabling clock\n");
 		return ret;
@@ -264,7 +242,7 @@
 #endif
 	},
 	.probe		= sdhci_sirf_probe,
-	.remove		= sdhci_sirf_remove,
+	.remove		= sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_sirf_driver);
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 22e5826..df08834 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -26,14 +26,13 @@
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sdhci-spear.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/io.h>
 #include "sdhci.h"
 
 struct spear_sdhci {
 	struct clk *clk;
-	struct sdhci_plat_data *data;
+	int card_int_gpio;
 };
 
 /* sdhci ops */
@@ -44,38 +43,20 @@
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
-#ifdef CONFIG_OF
-static struct sdhci_plat_data *sdhci_probe_config_dt(struct platform_device *pdev)
+static void sdhci_probe_config_dt(struct device_node *np,
+				struct spear_sdhci *host)
 {
-	struct device_node *np = pdev->dev.of_node;
-	struct sdhci_plat_data *pdata = NULL;
 	int cd_gpio;
 
 	cd_gpio = of_get_named_gpio(np, "cd-gpios", 0);
 	if (!gpio_is_valid(cd_gpio))
 		cd_gpio = -1;
 
-	/* If pdata is required */
-	if (cd_gpio != -1) {
-		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata)
-			dev_err(&pdev->dev, "DT: kzalloc failed\n");
-		else
-			pdata->card_int_gpio = cd_gpio;
-	}
-
-	return pdata;
+	host->card_int_gpio = cd_gpio;
 }
-#else
-static struct sdhci_plat_data *sdhci_probe_config_dt(struct platform_device *pdev)
-{
-	return ERR_PTR(-ENOSYS);
-}
-#endif
 
 static int sdhci_probe(struct platform_device *pdev)
 {
-	struct device_node *np = pdev->dev.of_node;
 	struct sdhci_host *host;
 	struct resource *iomem;
 	struct spear_sdhci *sdhci;
@@ -124,28 +105,18 @@
 		dev_dbg(&pdev->dev, "Error setting desired clk, clk=%lu\n",
 				clk_get_rate(sdhci->clk));
 
-	if (np) {
-		sdhci->data = sdhci_probe_config_dt(pdev);
-		if (IS_ERR(sdhci->data)) {
-			dev_err(&pdev->dev, "DT: Failed to get pdata\n");
-			goto disable_clk;
-		}
-	} else {
-		sdhci->data = dev_get_platdata(&pdev->dev);
-	}
-
+	sdhci_probe_config_dt(pdev->dev.of_node, sdhci);
 	/*
 	 * It is optional to use GPIOs for sdhci card detection. If
-	 * sdhci->data is NULL, then use original sdhci lines otherwise
+	 * sdhci->card_int_gpio < 0, then use original sdhci lines otherwise
 	 * GPIO lines. We use the built-in GPIO support for this.
 	 */
-	if (sdhci->data && sdhci->data->card_int_gpio >= 0) {
-		ret = mmc_gpio_request_cd(host->mmc,
-					  sdhci->data->card_int_gpio, 0);
+	if (sdhci->card_int_gpio >= 0) {
+		ret = mmc_gpio_request_cd(host->mmc, sdhci->card_int_gpio, 0);
 		if (ret < 0) {
 			dev_dbg(&pdev->dev,
 				"failed to request card-detect gpio%d\n",
-				sdhci->data->card_int_gpio);
+				sdhci->card_int_gpio);
 			goto disable_clk;
 		}
 	}
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index 882b07e..682f2bb 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -23,9 +23,295 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/mmc/host.h>
-
+#include <linux/reset.h>
 #include "sdhci-pltfm.h"
 
+struct st_mmc_platform_data {
+	struct  reset_control *rstc;
+	void __iomem *top_ioaddr;
+};
+
+/* MMCSS glue logic to setup the HC on some ST SoCs (e.g. STiH407 family) */
+
+#define ST_MMC_CCONFIG_REG_1		0x400
+#define ST_MMC_CCONFIG_TIMEOUT_CLK_UNIT	BIT(24)
+#define ST_MMC_CCONFIG_TIMEOUT_CLK_FREQ	BIT(12)
+#define ST_MMC_CCONFIG_TUNING_COUNT_DEFAULT	BIT(8)
+#define ST_MMC_CCONFIG_ASYNC_WAKEUP	BIT(0)
+#define ST_MMC_CCONFIG_1_DEFAULT	\
+				((ST_MMC_CCONFIG_TIMEOUT_CLK_UNIT) | \
+				 (ST_MMC_CCONFIG_TIMEOUT_CLK_FREQ) | \
+				 (ST_MMC_CCONFIG_TUNING_COUNT_DEFAULT))
+
+#define ST_MMC_CCONFIG_REG_2		0x404
+#define ST_MMC_CCONFIG_HIGH_SPEED	BIT(28)
+#define ST_MMC_CCONFIG_ADMA2		BIT(24)
+#define ST_MMC_CCONFIG_8BIT		BIT(20)
+#define ST_MMC_CCONFIG_MAX_BLK_LEN	16
+#define  MAX_BLK_LEN_1024		1
+#define  MAX_BLK_LEN_2048		2
+#define BASE_CLK_FREQ_200		0xc8
+#define BASE_CLK_FREQ_100		0x64
+#define BASE_CLK_FREQ_50		0x32
+#define ST_MMC_CCONFIG_2_DEFAULT \
+	(ST_MMC_CCONFIG_HIGH_SPEED | ST_MMC_CCONFIG_ADMA2 | \
+	 ST_MMC_CCONFIG_8BIT | \
+	 (MAX_BLK_LEN_1024 << ST_MMC_CCONFIG_MAX_BLK_LEN))
+
+#define ST_MMC_CCONFIG_REG_3			0x408
+#define ST_MMC_CCONFIG_EMMC_SLOT_TYPE		BIT(28)
+#define ST_MMC_CCONFIG_64BIT			BIT(24)
+#define ST_MMC_CCONFIG_ASYNCH_INTR_SUPPORT	BIT(20)
+#define ST_MMC_CCONFIG_1P8_VOLT			BIT(16)
+#define ST_MMC_CCONFIG_3P0_VOLT			BIT(12)
+#define ST_MMC_CCONFIG_3P3_VOLT			BIT(8)
+#define ST_MMC_CCONFIG_SUSP_RES_SUPPORT		BIT(4)
+#define ST_MMC_CCONFIG_SDMA			BIT(0)
+#define ST_MMC_CCONFIG_3_DEFAULT	\
+			 (ST_MMC_CCONFIG_ASYNCH_INTR_SUPPORT	| \
+			  ST_MMC_CCONFIG_3P3_VOLT		| \
+			  ST_MMC_CCONFIG_SUSP_RES_SUPPORT	| \
+			  ST_MMC_CCONFIG_SDMA)
+
+#define ST_MMC_CCONFIG_REG_4	0x40c
+#define ST_MMC_CCONFIG_D_DRIVER	BIT(20)
+#define ST_MMC_CCONFIG_C_DRIVER	BIT(16)
+#define ST_MMC_CCONFIG_A_DRIVER	BIT(12)
+#define ST_MMC_CCONFIG_DDR50	BIT(8)
+#define ST_MMC_CCONFIG_SDR104	BIT(4)
+#define ST_MMC_CCONFIG_SDR50	BIT(0)
+#define ST_MMC_CCONFIG_4_DEFAULT	0
+
+#define ST_MMC_CCONFIG_REG_5		0x410
+#define ST_MMC_CCONFIG_TUNING_FOR_SDR50	BIT(8)
+#define RETUNING_TIMER_CNT_MAX		0xf
+#define ST_MMC_CCONFIG_5_DEFAULT	0
+
+/* I/O configuration for Arasan IP */
+#define ST_MMC_GP_OUTPUT	0x450
+#define ST_MMC_GP_OUTPUT_CD	BIT(12)
+
+#define ST_MMC_STATUS_R		0x460
+
+#define ST_TOP_MMC_DLY_FIX_OFF(x)	(x - 0x8)
+
+/* TOP config registers to manage static and dynamic delay */
+#define ST_TOP_MMC_TX_CLK_DLY			ST_TOP_MMC_DLY_FIX_OFF(0x8)
+#define ST_TOP_MMC_RX_CLK_DLY			ST_TOP_MMC_DLY_FIX_OFF(0xc)
+/* MMC delay control register */
+#define ST_TOP_MMC_DLY_CTRL			ST_TOP_MMC_DLY_FIX_OFF(0x18)
+#define ST_TOP_MMC_DLY_CTRL_DLL_BYPASS_CMD	BIT(0)
+#define ST_TOP_MMC_DLY_CTRL_DLL_BYPASS_PH_SEL	BIT(1)
+#define ST_TOP_MMC_DLY_CTRL_TX_DLL_ENABLE	BIT(8)
+#define ST_TOP_MMC_DLY_CTRL_RX_DLL_ENABLE	BIT(9)
+#define ST_TOP_MMC_DLY_CTRL_ATUNE_NOT_CFG_DLY	BIT(10)
+#define ST_TOP_MMC_START_DLL_LOCK		BIT(11)
+
+/* register to provide the phase-shift value for DLL */
+#define ST_TOP_MMC_TX_DLL_STEP_DLY		ST_TOP_MMC_DLY_FIX_OFF(0x1c)
+#define ST_TOP_MMC_RX_DLL_STEP_DLY		ST_TOP_MMC_DLY_FIX_OFF(0x20)
+#define ST_TOP_MMC_RX_CMD_STEP_DLY		ST_TOP_MMC_DLY_FIX_OFF(0x24)
+
+/* phase shift delay on the tx clk 2.188ns */
+#define ST_TOP_MMC_TX_DLL_STEP_DLY_VALID	0x6
+
+#define ST_TOP_MMC_DLY_MAX			0xf
+
+#define ST_TOP_MMC_DYN_DLY_CONF	\
+		(ST_TOP_MMC_DLY_CTRL_TX_DLL_ENABLE | \
+		 ST_TOP_MMC_DLY_CTRL_ATUNE_NOT_CFG_DLY | \
+		 ST_TOP_MMC_START_DLL_LOCK)
+
+/*
+ * For clock speeds greater than 90MHz, we need to check that the
+ * DLL procedure has finished before switching to ultra-speed modes.
+ */
+#define	CLK_TO_CHECK_DLL_LOCK	90000000
+
+static inline void st_mmcss_set_static_delay(void __iomem *ioaddr)
+{
+	if (!ioaddr)
+		return;
+
+	writel_relaxed(0x0, ioaddr + ST_TOP_MMC_DLY_CTRL);
+	writel_relaxed(ST_TOP_MMC_DLY_MAX,
+			ioaddr + ST_TOP_MMC_TX_CLK_DLY);
+}
+
+/**
+ * st_mmcss_cconfig: configure the Arasan HC inside the flashSS.
+ * @np: dt device node.
+ * @host: sdhci host
+ * Description: this function is to configure the Arasan host controller.
+ * On some ST SoCs, i.e. STiH407 family, the MMC devices inside a dedicated
+ * flashSS sub-system which needs to be configured to be compliant to eMMC 4.5
+ * or eMMC4.3.  This has to be done before registering the sdhci host.
+ */
+static void st_mmcss_cconfig(struct device_node *np, struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct mmc_host *mhost = host->mmc;
+	u32 cconf2, cconf3, cconf4, cconf5;
+
+	if (!of_device_is_compatible(np, "st,sdhci-stih407"))
+		return;
+
+	cconf2 = ST_MMC_CCONFIG_2_DEFAULT;
+	cconf3 = ST_MMC_CCONFIG_3_DEFAULT;
+	cconf4 = ST_MMC_CCONFIG_4_DEFAULT;
+	cconf5 = ST_MMC_CCONFIG_5_DEFAULT;
+
+	writel_relaxed(ST_MMC_CCONFIG_1_DEFAULT,
+			host->ioaddr + ST_MMC_CCONFIG_REG_1);
+
+	/* Set clock frequency, default to 50MHz if max-frequency is not
+	 * provided */
+
+	switch (mhost->f_max) {
+	case 200000000:
+		clk_set_rate(pltfm_host->clk, mhost->f_max);
+		cconf2 |= BASE_CLK_FREQ_200;
+		break;
+	case 100000000:
+		clk_set_rate(pltfm_host->clk, mhost->f_max);
+		cconf2 |= BASE_CLK_FREQ_100;
+		break;
+	default:
+		clk_set_rate(pltfm_host->clk, 50000000);
+		cconf2 |= BASE_CLK_FREQ_50;
+	}
+
+	writel_relaxed(cconf2, host->ioaddr + ST_MMC_CCONFIG_REG_2);
+
+	if (mhost->caps & MMC_CAP_NONREMOVABLE)
+		cconf3 |= ST_MMC_CCONFIG_EMMC_SLOT_TYPE;
+	else
+		/* CARD _D ET_CTRL */
+		writel_relaxed(ST_MMC_GP_OUTPUT_CD,
+				host->ioaddr + ST_MMC_GP_OUTPUT);
+
+	if (mhost->caps & MMC_CAP_UHS_SDR50) {
+		/* use 1.8V */
+		cconf3 |= ST_MMC_CCONFIG_1P8_VOLT;
+		cconf4 |= ST_MMC_CCONFIG_SDR50;
+		/* Use tuning */
+		cconf5 |= ST_MMC_CCONFIG_TUNING_FOR_SDR50;
+		/* Max timeout for retuning */
+		cconf5 |= RETUNING_TIMER_CNT_MAX;
+	}
+
+	if (mhost->caps & MMC_CAP_UHS_SDR104) {
+		/*
+		 * SDR104 implies the HC can support HS200 mode, so
+		 * it's mandatory to use 1.8V
+		 */
+		cconf3 |= ST_MMC_CCONFIG_1P8_VOLT;
+		cconf4 |= ST_MMC_CCONFIG_SDR104;
+		/* Max timeout for retuning */
+		cconf5 |= RETUNING_TIMER_CNT_MAX;
+	}
+
+	if (mhost->caps & MMC_CAP_UHS_DDR50)
+		cconf4 |= ST_MMC_CCONFIG_DDR50;
+
+	writel_relaxed(cconf3, host->ioaddr + ST_MMC_CCONFIG_REG_3);
+	writel_relaxed(cconf4, host->ioaddr + ST_MMC_CCONFIG_REG_4);
+	writel_relaxed(cconf5, host->ioaddr + ST_MMC_CCONFIG_REG_5);
+}
+
+static inline void st_mmcss_set_dll(void __iomem *ioaddr)
+{
+	if (!ioaddr)
+		return;
+
+	writel_relaxed(ST_TOP_MMC_DYN_DLY_CONF,	ioaddr + ST_TOP_MMC_DLY_CTRL);
+	writel_relaxed(ST_TOP_MMC_TX_DLL_STEP_DLY_VALID,
+			ioaddr + ST_TOP_MMC_TX_DLL_STEP_DLY);
+}
+
+static int st_mmcss_lock_dll(void __iomem *ioaddr)
+{
+	unsigned long curr, value;
+	unsigned long finish = jiffies + HZ;
+
+	/* Checks if the DLL procedure is finished */
+	do {
+		curr = jiffies;
+		value = readl(ioaddr + ST_MMC_STATUS_R);
+		if (value & 0x1)
+			return 0;
+
+		cpu_relax();
+	} while (!time_after_eq(curr, finish));
+
+	return -EBUSY;
+}
+
+static int sdhci_st_set_dll_for_clock(struct sdhci_host *host)
+{
+	int ret = 0;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+
+	if (host->clock > CLK_TO_CHECK_DLL_LOCK) {
+		st_mmcss_set_dll(pdata->top_ioaddr);
+		ret = st_mmcss_lock_dll(host->ioaddr);
+	}
+
+	return ret;
+}
+
+static void sdhci_st_set_uhs_signaling(struct sdhci_host *host,
+					unsigned int uhs)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	u16 ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	int ret = 0;
+
+	/* Select Bus Speed Mode for host */
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	switch (uhs) {
+	/*
+	 * Set V18_EN -- UHS modes do not work without this.
+	 * does not change signaling voltage
+	 */
+
+	case MMC_TIMING_UHS_SDR12:
+		st_mmcss_set_static_delay(pdata->top_ioaddr);
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR12 | SDHCI_CTRL_VDD_180;
+		break;
+	case MMC_TIMING_UHS_SDR25:
+		st_mmcss_set_static_delay(pdata->top_ioaddr);
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25 | SDHCI_CTRL_VDD_180;
+		break;
+	case MMC_TIMING_UHS_SDR50:
+		st_mmcss_set_static_delay(pdata->top_ioaddr);
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180;
+		ret = sdhci_st_set_dll_for_clock(host);
+		break;
+	case MMC_TIMING_UHS_SDR104:
+	case MMC_TIMING_MMC_HS200:
+		st_mmcss_set_static_delay(pdata->top_ioaddr);
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180;
+		ret =  sdhci_st_set_dll_for_clock(host);
+		break;
+	case MMC_TIMING_UHS_DDR50:
+	case MMC_TIMING_MMC_DDR52:
+		st_mmcss_set_static_delay(pdata->top_ioaddr);
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180;
+		break;
+	}
+
+	if (ret)
+		dev_warn(mmc_dev(host->mmc), "Error setting dll for clock "
+						"(uhs %d)\n", uhs);
+
+	dev_dbg(mmc_dev(host->mmc), "uhs %d, ctrl_2 %04X\n", uhs, ctrl_2);
+
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+
 static u32 sdhci_st_readl(struct sdhci_host *host, int reg)
 {
 	u32 ret;
@@ -48,22 +334,33 @@
 	.set_bus_width = sdhci_set_bus_width,
 	.read_l = sdhci_st_readl,
 	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_st_set_uhs_signaling,
 };
 
 static const struct sdhci_pltfm_data sdhci_st_pdata = {
 	.ops = &sdhci_st_ops,
 	.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
-	    SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+		SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+		SDHCI_QUIRK_NO_HISPD_BIT,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+		SDHCI_QUIRK2_STOP_WITH_TC,
 };
 
 
 static int sdhci_st_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
 	struct sdhci_host *host;
+	struct st_mmc_platform_data *pdata;
 	struct sdhci_pltfm_host *pltfm_host;
 	struct clk *clk;
 	int ret = 0;
 	u16 host_version;
+	struct resource *res;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
 
 	clk =  devm_clk_get(&pdev->dev, "mmc");
 	if (IS_ERR(clk)) {
@@ -71,10 +368,17 @@
 		return PTR_ERR(clk);
 	}
 
+	pdata->rstc = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(pdata->rstc))
+		pdata->rstc = NULL;
+	else
+		reset_control_deassert(pdata->rstc);
+
 	host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
 	if (IS_ERR(host)) {
 		dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
-		return PTR_ERR(host);
+		ret = PTR_ERR(host);
+		goto err_pltfm_init;
 	}
 
 	ret = mmc_of_parse(host->mmc);
@@ -85,9 +389,22 @@
 
 	clk_prepare_enable(clk);
 
+	/* Configure the FlashSS Top registers for setting eMMC TX/RX delay */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "top-mmc-delay");
+	pdata->top_ioaddr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pdata->top_ioaddr)) {
+		dev_warn(&pdev->dev, "FlashSS Top Dly registers not available");
+		pdata->top_ioaddr = NULL;
+	}
+
 	pltfm_host = sdhci_priv(host);
+	pltfm_host->priv = pdata;
 	pltfm_host->clk = clk;
 
+	/* Configure the Arasan HC inside the flashSS */
+	st_mmcss_cconfig(np, host);
+
 	ret = sdhci_add_host(host);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed sdhci_add_host\n");
@@ -109,6 +426,9 @@
 	clk_disable_unprepare(clk);
 err_of:
 	sdhci_pltfm_free(pdev);
+err_pltfm_init:
+	if (pdata->rstc)
+		reset_control_assert(pdata->rstc);
 
 	return ret;
 }
@@ -117,10 +437,15 @@
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	int ret;
 
-	clk_disable_unprepare(pltfm_host->clk);
+	ret = sdhci_pltfm_unregister(pdev);
 
-	return sdhci_pltfm_unregister(pdev);
+	if (pdata->rstc)
+		reset_control_assert(pdata->rstc);
+
+	return ret;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -128,11 +453,15 @@
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct st_mmc_platform_data *pdata = pltfm_host->priv;
 	int ret = sdhci_suspend_host(host);
 
 	if (ret)
 		goto out;
 
+	if (pdata->rstc)
+		reset_control_assert(pdata->rstc);
+
 	clk_disable_unprepare(pltfm_host->clk);
 out:
 	return ret;
@@ -142,9 +471,16 @@
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	struct device_node *np = dev->of_node;
 
 	clk_prepare_enable(pltfm_host->clk);
 
+	if (pdata->rstc)
+		reset_control_deassert(pdata->rstc);
+
+	st_mmcss_cconfig(np, host);
+
 	return sdhci_resume_host(host);
 }
 #endif
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index f3778d5..ad28b49 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -20,11 +20,10 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/slot-gpio.h>
+#include <linux/gpio/consumer.h>
 
 #include "sdhci-pltfm.h"
 
@@ -41,7 +40,6 @@
 #define NVQUIRK_DISABLE_SDR50		BIT(3)
 #define NVQUIRK_DISABLE_SDR104		BIT(4)
 #define NVQUIRK_DISABLE_DDR50		BIT(5)
-#define NVQUIRK_SHADOW_XFER_MODE_REG	BIT(6)
 
 struct sdhci_tegra_soc_data {
 	const struct sdhci_pltfm_data *pdata;
@@ -50,7 +48,7 @@
 
 struct sdhci_tegra {
 	const struct sdhci_tegra_soc_data *soc_data;
-	int power_gpio;
+	struct gpio_desc *power_gpio;
 };
 
 static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
@@ -71,23 +69,19 @@
 static void tegra_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
-	const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
 
-	if (soc_data->nvquirks & NVQUIRK_SHADOW_XFER_MODE_REG) {
-		switch (reg) {
-		case SDHCI_TRANSFER_MODE:
-			/*
-			 * Postpone this write, we must do it together with a
-			 * command write that is down below.
-			 */
-			pltfm_host->xfer_mode_shadow = val;
-			return;
-		case SDHCI_COMMAND:
-			writel((val << 16) | pltfm_host->xfer_mode_shadow,
-				host->ioaddr + SDHCI_TRANSFER_MODE);
-			return;
-		}
+	switch (reg) {
+	case SDHCI_TRANSFER_MODE:
+		/*
+		 * Postpone this write, we must do it together with a
+		 * command write that is down below.
+		 */
+		pltfm_host->xfer_mode_shadow = val;
+		return;
+	case SDHCI_COMMAND:
+		writel((val << 16) | pltfm_host->xfer_mode_shadow,
+			host->ioaddr + SDHCI_TRANSFER_MODE);
+		return;
 	}
 
 	writew(val, host->ioaddr + reg);
@@ -173,7 +167,6 @@
 static const struct sdhci_ops tegra_sdhci_ops = {
 	.get_ro     = tegra_sdhci_get_ro,
 	.read_w     = tegra_sdhci_readw,
-	.write_w    = tegra_sdhci_writew,
 	.write_l    = tegra_sdhci_writel,
 	.set_clock  = sdhci_set_clock,
 	.set_bus_width = tegra_sdhci_set_bus_width,
@@ -214,6 +207,18 @@
 		    NVQUIRK_DISABLE_SDR104,
 };
 
+static const struct sdhci_ops tegra114_sdhci_ops = {
+	.get_ro     = tegra_sdhci_get_ro,
+	.read_w     = tegra_sdhci_readw,
+	.write_w    = tegra_sdhci_writew,
+	.write_l    = tegra_sdhci_writel,
+	.set_clock  = sdhci_set_clock,
+	.set_bus_width = tegra_sdhci_set_bus_width,
+	.reset      = tegra_sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
+	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
+};
+
 static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
 	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
@@ -221,15 +226,14 @@
 		  SDHCI_QUIRK_NO_HISPD_BIT |
 		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
 		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
-	.ops  = &tegra_sdhci_ops,
+	.ops  = &tegra114_sdhci_ops,
 };
 
 static struct sdhci_tegra_soc_data soc_data_tegra114 = {
 	.pdata = &sdhci_tegra114_pdata,
 	.nvquirks = NVQUIRK_DISABLE_SDR50 |
 		    NVQUIRK_DISABLE_DDR50 |
-		    NVQUIRK_DISABLE_SDR104 |
-		    NVQUIRK_SHADOW_XFER_MODE_REG,
+		    NVQUIRK_DISABLE_SDR104,
 };
 
 static const struct of_device_id sdhci_tegra_dt_match[] = {
@@ -241,17 +245,6 @@
 };
 MODULE_DEVICE_TABLE(of, sdhci_tegra_dt_match);
 
-static int sdhci_tegra_parse_dt(struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	struct sdhci_host *host = dev_get_drvdata(dev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
-
-	tegra_host->power_gpio = of_get_named_gpio(np, "power-gpios", 0);
-	return mmc_of_parse(host->mmc);
-}
-
 static int sdhci_tegra_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
@@ -281,21 +274,18 @@
 	tegra_host->soc_data = soc_data;
 	pltfm_host->priv = tegra_host;
 
-	rc = sdhci_tegra_parse_dt(&pdev->dev);
+	rc = mmc_of_parse(host->mmc);
 	if (rc)
 		goto err_parse_dt;
 
-	if (gpio_is_valid(tegra_host->power_gpio)) {
-		rc = gpio_request(tegra_host->power_gpio, "sdhci_power");
-		if (rc) {
-			dev_err(mmc_dev(host->mmc),
-				"failed to allocate power gpio\n");
-			goto err_power_req;
-		}
-		gpio_direction_output(tegra_host->power_gpio, 1);
+	tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power",
+							 GPIOD_OUT_HIGH);
+	if (IS_ERR(tegra_host->power_gpio)) {
+		rc = PTR_ERR(tegra_host->power_gpio);
+		goto err_power_req;
 	}
 
-	clk = clk_get(mmc_dev(host->mmc), NULL);
+	clk = devm_clk_get(mmc_dev(host->mmc), NULL);
 	if (IS_ERR(clk)) {
 		dev_err(mmc_dev(host->mmc), "clk err\n");
 		rc = PTR_ERR(clk);
@@ -312,10 +302,7 @@
 
 err_add_host:
 	clk_disable_unprepare(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
 err_clk_get:
-	if (gpio_is_valid(tegra_host->power_gpio))
-		gpio_free(tegra_host->power_gpio);
 err_power_req:
 err_parse_dt:
 err_alloc_tegra_host:
@@ -323,26 +310,6 @@
 	return rc;
 }
 
-static int sdhci_tegra_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
-	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-	sdhci_remove_host(host, dead);
-
-	if (gpio_is_valid(tegra_host->power_gpio))
-		gpio_free(tegra_host->power_gpio);
-
-	clk_disable_unprepare(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
-
-	sdhci_pltfm_free(pdev);
-
-	return 0;
-}
-
 static struct platform_driver sdhci_tegra_driver = {
 	.driver		= {
 		.name	= "sdhci-tegra",
@@ -350,7 +317,7 @@
 		.pm	= SDHCI_PLTFM_PMOPS,
 	},
 	.probe		= sdhci_tegra_probe,
-	.remove		= sdhci_tegra_remove,
+	.remove		= sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_tegra_driver);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0ad412a..c80287a 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -28,6 +28,7 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/sdio.h>
 #include <linux/mmc/slot-gpio.h>
 
 #include "sdhci.h"
@@ -56,6 +57,7 @@
 static int sdhci_pre_dma_transfer(struct sdhci_host *host,
 					struct mmc_data *data,
 					struct sdhci_host_next *next);
+static int sdhci_do_get_cd(struct sdhci_host *host);
 
 #ifdef CONFIG_PM
 static int sdhci_runtime_pm_get(struct sdhci_host *host);
@@ -931,7 +933,8 @@
 		 * If we are sending CMD23, CMD12 never gets sent
 		 * on successful completion (so no Auto-CMD12).
 		 */
-		if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12))
+		if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12) &&
+		    (cmd->opcode != SD_IO_RW_EXTENDED))
 			mode |= SDHCI_TRNS_AUTO_CMD12;
 		else if (host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
 			mode |= SDHCI_TRNS_AUTO_CMD23;
@@ -1356,7 +1359,8 @@
 
 	sdhci_runtime_pm_get(host);
 
-	present = mmc_gpio_get_cd(host->mmc);
+	/* Firstly check card presence */
+	present = sdhci_do_get_cd(host);
 
 	spin_lock_irqsave(&host->lock, flags);
 
@@ -1379,22 +1383,6 @@
 
 	host->mrq = mrq;
 
-	/*
-	 * Firstly check card presence from cd-gpio.  The return could
-	 * be one of the following possibilities:
-	 *     negative: cd-gpio is not available
-	 *     zero: cd-gpio is used, and card is removed
-	 *     one: cd-gpio is used, and card is present
-	 */
-	if (present < 0) {
-		/* If polling, assume that the card is always present. */
-		if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
-			present = 1;
-		else
-			present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
-					SDHCI_CARD_PRESENT;
-	}
-
 	if (!present || host->flags & SDHCI_DEVICE_DEAD) {
 		host->mrq->cmd->error = -ENOMEDIUM;
 		tasklet_schedule(&host->finish_tasklet);
@@ -3164,7 +3152,8 @@
 	/* Auto-CMD23 stuff only works in ADMA or PIO. */
 	if ((host->version >= SDHCI_SPEC_300) &&
 	    ((host->flags & SDHCI_USE_ADMA) ||
-	     !(host->flags & SDHCI_USE_SDMA))) {
+	     !(host->flags & SDHCI_USE_SDMA)) &&
+	     !(host->quirks2 & SDHCI_QUIRK2_ACMD23_BROKEN)) {
 		host->flags |= SDHCI_AUTO_CMD23;
 		DBG("%s: Auto-CMD23 available\n", mmc_hostname(mmc));
 	} else {
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0315e18..e639b7f 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -18,7 +18,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
-#include <linux/mmc/sdhci.h>
+#include <linux/mmc/host.h>
 
 /*
  * Controller registers
@@ -309,6 +309,207 @@
  */
 #define SDHCI_MAX_SEGS		128
 
+struct sdhci_host_next {
+	unsigned int	sg_count;
+	s32		cookie;
+};
+
+struct sdhci_host {
+	/* Data set by hardware interface driver */
+	const char *hw_name;	/* Hardware bus name */
+
+	unsigned int quirks;	/* Deviations from spec. */
+
+/* Controller doesn't honor resets unless we touch the clock register */
+#define SDHCI_QUIRK_CLOCK_BEFORE_RESET			(1<<0)
+/* Controller has bad caps bits, but really supports DMA */
+#define SDHCI_QUIRK_FORCE_DMA				(1<<1)
+/* Controller doesn't like to be reset when there is no card inserted. */
+#define SDHCI_QUIRK_NO_CARD_NO_RESET			(1<<2)
+/* Controller doesn't like clearing the power reg before a change */
+#define SDHCI_QUIRK_SINGLE_POWER_WRITE			(1<<3)
+/* Controller has flaky internal state so reset it on each ios change */
+#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS		(1<<4)
+/* Controller has an unusable DMA engine */
+#define SDHCI_QUIRK_BROKEN_DMA				(1<<5)
+/* Controller has an unusable ADMA engine */
+#define SDHCI_QUIRK_BROKEN_ADMA				(1<<6)
+/* Controller can only DMA from 32-bit aligned addresses */
+#define SDHCI_QUIRK_32BIT_DMA_ADDR			(1<<7)
+/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_DMA_SIZE			(1<<8)
+/* Controller can only ADMA chunks that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_ADMA_SIZE			(1<<9)
+/* Controller needs to be reset after each request to stay stable */
+#define SDHCI_QUIRK_RESET_AFTER_REQUEST			(1<<10)
+/* Controller needs voltage and power writes to happen separately */
+#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER		(1<<11)
+/* Controller provides an incorrect timeout value for transfers */
+#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1<<12)
+/* Controller has an issue with buffer bits for small transfers */
+#define SDHCI_QUIRK_BROKEN_SMALL_PIO			(1<<13)
+/* Controller does not provide transfer-complete interrupt when not busy */
+#define SDHCI_QUIRK_NO_BUSY_IRQ				(1<<14)
+/* Controller has unreliable card detection */
+#define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
+/* Controller reports inverted write-protect state */
+#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
+/* Controller does not like fast PIO transfers */
+#define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
+/* Controller has to be forced to use block size of 2048 bytes */
+#define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
+/* Controller cannot do multi-block transfers */
+#define SDHCI_QUIRK_NO_MULTIBLOCK			(1<<21)
+/* Controller can only handle 1-bit data transfers */
+#define SDHCI_QUIRK_FORCE_1_BIT_DATA			(1<<22)
+/* Controller needs 10ms delay between applying power and clock */
+#define SDHCI_QUIRK_DELAY_AFTER_POWER			(1<<23)
+/* Controller uses SDCLK instead of TMCLK for data timeouts */
+#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1<<24)
+/* Controller reports wrong base clock capability */
+#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN		(1<<25)
+/* Controller cannot support End Attribute in NOP ADMA descriptor */
+#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC		(1<<26)
+/* Controller is missing device caps. Use caps provided by host */
+#define SDHCI_QUIRK_MISSING_CAPS			(1<<27)
+/* Controller uses Auto CMD12 command to stop the transfer */
+#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12		(1<<28)
+/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
+#define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
+/* Controller treats ADMA descriptors with length 0000h incorrectly */
+#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC		(1<<30)
+/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */
+#define SDHCI_QUIRK_UNSTABLE_RO_DETECT			(1<<31)
+
+	unsigned int quirks2;	/* More deviations from spec. */
+
+#define SDHCI_QUIRK2_HOST_OFF_CARD_ON			(1<<0)
+#define SDHCI_QUIRK2_HOST_NO_CMD23			(1<<1)
+/* The system physically doesn't support 1.8v, even if the host does */
+#define SDHCI_QUIRK2_NO_1_8_V				(1<<2)
+#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN		(1<<3)
+#define SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON		(1<<4)
+/* Controller has a non-standard host control register */
+#define SDHCI_QUIRK2_BROKEN_HOST_CONTROL		(1<<5)
+/* Controller does not support HS200 */
+#define SDHCI_QUIRK2_BROKEN_HS200			(1<<6)
+/* Controller does not support DDR50 */
+#define SDHCI_QUIRK2_BROKEN_DDR50			(1<<7)
+/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
+#define SDHCI_QUIRK2_STOP_WITH_TC			(1<<8)
+/* Controller does not support 64-bit DMA */
+#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA			(1<<9)
+/* need clear transfer mode register before send cmd */
+#define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD	(1<<10)
+/* Capability register bit-63 indicates HS400 support */
+#define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400		(1<<11)
+/* forced tuned clock */
+#define SDHCI_QUIRK2_TUNING_WORK_AROUND			(1<<12)
+/* disable the block count for single block transactions */
+#define SDHCI_QUIRK2_SUPPORT_SINGLE			(1<<13)
+/* Controller broken with using ACMD23 */
+#define SDHCI_QUIRK2_ACMD23_BROKEN			(1<<14)
+
+	int irq;		/* Device IRQ */
+	void __iomem *ioaddr;	/* Mapped address */
+
+	const struct sdhci_ops *ops;	/* Low level hw interface */
+
+	/* Internal data */
+	struct mmc_host *mmc;	/* MMC structure */
+	u64 dma_mask;		/* custom DMA mask */
+
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+	struct led_classdev led;	/* LED control */
+	char led_name[32];
+#endif
+
+	spinlock_t lock;	/* Mutex */
+
+	int flags;		/* Host attributes */
+#define SDHCI_USE_SDMA		(1<<0)	/* Host is SDMA capable */
+#define SDHCI_USE_ADMA		(1<<1)	/* Host is ADMA capable */
+#define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
+#define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
+#define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
+#define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
+#define SDHCI_AUTO_CMD12	(1<<6)	/* Auto CMD12 support */
+#define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
+#define SDHCI_PV_ENABLED	(1<<8)	/* Preset value enabled */
+#define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
+#define SDHCI_SDR104_NEEDS_TUNING (1<<10)	/* SDR104/HS200 needs tuning */
+#define SDHCI_USING_RETUNING_TIMER (1<<11)	/* Host is using a retuning timer for the card */
+#define SDHCI_USE_64_BIT_DMA	(1<<12)	/* Use 64-bit DMA */
+#define SDHCI_HS400_TUNING	(1<<13)	/* Tuning for HS400 */
+
+	unsigned int version;	/* SDHCI spec. version */
+
+	unsigned int max_clk;	/* Max possible freq (MHz) */
+	unsigned int timeout_clk;	/* Timeout freq (KHz) */
+	unsigned int clk_mul;	/* Clock Muliplier value */
+
+	unsigned int clock;	/* Current clock (MHz) */
+	u8 pwr;			/* Current voltage */
+
+	bool runtime_suspended;	/* Host is runtime suspended */
+	bool bus_on;		/* Bus power prevents runtime suspend */
+	bool preset_enabled;	/* Preset is enabled */
+
+	struct mmc_request *mrq;	/* Current request */
+	struct mmc_command *cmd;	/* Current command */
+	struct mmc_data *data;	/* Current data request */
+	unsigned int data_early:1;	/* Data finished before cmd */
+	unsigned int busy_handle:1;	/* Handling the order of Busy-end */
+
+	struct sg_mapping_iter sg_miter;	/* SG state for PIO */
+	unsigned int blocks;	/* remaining PIO blocks */
+
+	int sg_count;		/* Mapped sg entries */
+
+	void *adma_table;	/* ADMA descriptor table */
+	void *align_buffer;	/* Bounce buffer */
+
+	size_t adma_table_sz;	/* ADMA descriptor table size */
+	size_t align_buffer_sz;	/* Bounce buffer size */
+
+	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
+	dma_addr_t align_addr;	/* Mapped bounce buffer */
+
+	unsigned int desc_sz;	/* ADMA descriptor size */
+	unsigned int align_sz;	/* ADMA alignment */
+	unsigned int align_mask;	/* ADMA alignment mask */
+
+	struct tasklet_struct finish_tasklet;	/* Tasklet structures */
+
+	struct timer_list timer;	/* Timer for timeouts */
+
+	u32 caps;		/* Alternative CAPABILITY_0 */
+	u32 caps1;		/* Alternative CAPABILITY_1 */
+
+	unsigned int            ocr_avail_sdio;	/* OCR bit masks */
+	unsigned int            ocr_avail_sd;
+	unsigned int            ocr_avail_mmc;
+	u32 ocr_mask;		/* available voltages */
+
+	unsigned		timing;		/* Current timing */
+
+	u32			thread_isr;
+
+	/* cached registers */
+	u32			ier;
+
+	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
+	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
+
+	unsigned int		tuning_count;	/* Timer count for re-tuning */
+	unsigned int		tuning_mode;	/* Re-tuning mode supported by host */
+#define SDHCI_TUNING_MODE_1	0
+	struct timer_list	tuning_timer;	/* Timer for tuning */
+
+	struct sdhci_host_next	next_data;
+	unsigned long private[0] ____cacheline_aligned;
+};
+
 struct sdhci_ops {
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
 	u32		(*read_l)(struct sdhci_host *host, int reg);
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 7d9d6a3..072f670 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -875,6 +875,7 @@
 	struct mmc_command *cmd = mrq->cmd;
 	u32 opc = cmd->opcode;
 	u32 mask;
+	unsigned long flags;
 
 	switch (opc) {
 	/* response busy check */
@@ -909,10 +910,12 @@
 	/* set arg */
 	sh_mmcif_writel(host->addr, MMCIF_CE_ARG, cmd->arg);
 	/* set cmd */
+	spin_lock_irqsave(&host->lock, flags);
 	sh_mmcif_writel(host->addr, MMCIF_CE_CMD_SET, opc);
 
 	host->wait_for = MMCIF_WAIT_FOR_CMD;
 	schedule_delayed_work(&host->timeout_work, host->timeout);
+	spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static void sh_mmcif_stop_cmd(struct sh_mmcif_host *host,
@@ -1171,6 +1174,12 @@
 	struct sh_mmcif_host *host = dev_id;
 	struct mmc_request *mrq;
 	bool wait = false;
+	unsigned long flags;
+	int wait_work;
+
+	spin_lock_irqsave(&host->lock, flags);
+	wait_work = host->wait_for;
+	spin_unlock_irqrestore(&host->lock, flags);
 
 	cancel_delayed_work_sync(&host->timeout_work);
 
@@ -1188,7 +1197,7 @@
 	 * All handlers return true, if processing continues, and false, if the
 	 * request has to be completed - successfully or not
 	 */
-	switch (host->wait_for) {
+	switch (wait_work) {
 	case MMCIF_WAIT_FOR_REQUEST:
 		/* We're too late, the timeout has already kicked in */
 		mutex_unlock(&host->thread_lock);
@@ -1312,15 +1321,15 @@
 		/* Don't run after mmc_remove_host() */
 		return;
 
-	dev_err(&host->pd->dev, "Timeout waiting for %u on CMD%u\n",
-		host->wait_for, mrq->cmd->opcode);
-
 	spin_lock_irqsave(&host->lock, flags);
 	if (host->state == STATE_IDLE) {
 		spin_unlock_irqrestore(&host->lock, flags);
 		return;
 	}
 
+	dev_err(&host->pd->dev, "Timeout waiting for %u on CMD%u\n",
+		host->wait_for, mrq->cmd->opcode);
+
 	host->state = STATE_TIMEOUT;
 	spin_unlock_irqrestore(&host->lock, flags);
 
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index e8a4218..4d3e1ff 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -293,7 +293,7 @@
 				    struct mmc_data *data)
 {
 	struct sunxi_idma_des *pdes = (struct sunxi_idma_des *)host->sg_cpu;
-	struct sunxi_idma_des *pdes_pa = (struct sunxi_idma_des *)host->sg_dma;
+	dma_addr_t next_desc = host->sg_dma;
 	int i, max_len = (1 << host->idma_des_size_bits);
 
 	for (i = 0; i < data->sg_len; i++) {
@@ -305,8 +305,9 @@
 		else
 			pdes[i].buf_size = data->sg[i].length;
 
+		next_desc += sizeof(struct sunxi_idma_des);
 		pdes[i].buf_addr_ptr1 = sg_dma_address(&data->sg[i]);
-		pdes[i].buf_addr_ptr2 = (u32)&pdes_pa[i + 1];
+		pdes[i].buf_addr_ptr2 = (u32)next_desc;
 	}
 
 	pdes[0].config |= SDXC_IDMAC_DES0_FD;
@@ -930,7 +931,9 @@
 		return PTR_ERR(host->clk_sample);
 	}
 
-	host->reset = devm_reset_control_get(&pdev->dev, "ahb");
+	host->reset = devm_reset_control_get_optional(&pdev->dev, "ahb");
+	if (PTR_ERR(host->reset) == -EPROBE_DEFER)
+		return PTR_ERR(host->reset);
 
 	ret = clk_prepare_enable(host->clk_ahb);
 	if (ret) {
@@ -1028,7 +1031,7 @@
 	mmc->f_min		=   400000;
 	mmc->f_max		= 50000000;
 	mmc->caps	       |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-				  MMC_CAP_ERASE;
+				  MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ;
 
 	ret = mmc_of_parse(mmc);
 	if (ret)
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index a31c357..dba7e1c 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -1073,8 +1073,6 @@
 void tmio_mmc_host_free(struct tmio_mmc_host *host)
 {
 	mmc_free_host(host->mmc);
-
-	host->mmc = NULL;
 }
 EXPORT_SYMBOL(tmio_mmc_host_free);
 
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index dd2e1aa..5af0055 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -744,7 +744,7 @@
 	.max_blk_size = 2048,
 };
 
-static struct of_device_id wmt_mci_dt_ids[] = {
+static const struct of_device_id wmt_mci_dt_ids[] = {
 	{ .compatible = "wm,wm8505-sdhc", .data = &wm8505_caps },
 	{ /* Sentinel */ },
 };
diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index 289ad3a..8dcc7b8 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c
@@ -758,8 +758,7 @@
 
 	hisi_nfc_host_init(host);
 
-	ret = devm_request_irq(dev, irq, hinfc_irq_handle, IRQF_DISABLED,
-				"nandc", host);
+	ret = devm_request_irq(dev, irq, hinfc_irq_handle, 0x0, "nandc", host);
 	if (ret) {
 		dev_err(dev, "failed to request IRQ\n");
 		goto err_res;
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 46d6c75c..20c1332 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -2,6 +2,9 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
@@ -66,6 +69,87 @@
 	return device_add(&ofdev->dev);
 }
 
+/**
+ * of_dma_configure - Setup DMA configuration
+ * @dev:	Device to apply DMA configuration
+ * @np:		Pointer to OF node having DMA configuration
+ *
+ * Try to get devices's DMA configuration from DT and update it
+ * accordingly.
+ *
+ * If platform code needs to use its own special DMA configuration, it
+ * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
+ * to fix up DMA configuration.
+ */
+void of_dma_configure(struct device *dev, struct device_node *np)
+{
+	u64 dma_addr, paddr, size;
+	int ret;
+	bool coherent;
+	unsigned long offset;
+	struct iommu_ops *iommu;
+
+	/*
+	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
+	 * setup the correct supported mask.
+	 */
+	if (!dev->coherent_dma_mask)
+		dev->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	/*
+	 * Set it to coherent_dma_mask by default if the architecture
+	 * code has not set it.
+	 */
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+
+	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+	if (ret < 0) {
+		dma_addr = offset = 0;
+		size = dev->coherent_dma_mask + 1;
+	} else {
+		offset = PFN_DOWN(paddr - dma_addr);
+
+		/*
+		 * Add a work around to treat the size as mask + 1 in case
+		 * it is defined in DT as a mask.
+		 */
+		if (size & 1) {
+			dev_warn(dev, "Invalid size 0x%llx for dma-range\n",
+				 size);
+			size = size + 1;
+		}
+
+		if (!size) {
+			dev_err(dev, "Adjusted size 0x%llx invalid\n", size);
+			return;
+		}
+		dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset);
+	}
+
+	dev->dma_pfn_offset = offset;
+
+	/*
+	 * Limit coherent and dma mask based on size and default mask
+	 * set by the driver.
+	 */
+	dev->coherent_dma_mask = min(dev->coherent_dma_mask,
+				     DMA_BIT_MASK(ilog2(dma_addr + size)));
+	*dev->dma_mask = min((*dev->dma_mask),
+			     DMA_BIT_MASK(ilog2(dma_addr + size)));
+
+	coherent = of_dma_is_coherent(np);
+	dev_dbg(dev, "device is%sdma coherent\n",
+		coherent ? " " : " not ");
+
+	iommu = of_iommu_configure(dev, np);
+	dev_dbg(dev, "device is%sbehind an iommu\n",
+		iommu ? " " : " not ");
+
+	arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);
+}
+EXPORT_SYMBOL_GPL(of_dma_configure);
+
 int of_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 62426d8..5751dc5 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/of_pci.h>
 #include <linux/slab.h>
 
@@ -116,6 +117,26 @@
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
+/**
+ * of_pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to pci_dev struct of the PCI device
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node of host bridge's parent (if any).
+ */
+void of_pci_dma_configure(struct pci_dev *pci_dev)
+{
+	struct device *dev = &pci_dev->dev;
+	struct device *bridge = pci_get_host_bridge_device(pci_dev);
+
+	if (!bridge->parent)
+		return;
+
+	of_dma_configure(dev, bridge->parent->of_node);
+	pci_put_host_bridge_device(bridge);
+}
+EXPORT_SYMBOL_GPL(of_pci_dma_configure);
+
 #if defined(CONFIG_OF_ADDRESS)
 /**
  * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index b189733..a01f57c 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
@@ -150,59 +149,6 @@
 }
 EXPORT_SYMBOL(of_device_alloc);
 
-/**
- * of_dma_configure - Setup DMA configuration
- * @dev:	Device to apply DMA configuration
- *
- * Try to get devices's DMA configuration from DT and update it
- * accordingly.
- *
- * In case if platform code need to use own special DMA configuration,it
- * can use Platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE event
- * to fix up DMA configuration.
- */
-static void of_dma_configure(struct device *dev)
-{
-	u64 dma_addr, paddr, size;
-	int ret;
-	bool coherent;
-	unsigned long offset;
-	struct iommu_ops *iommu;
-
-	/*
-	 * Set default dma-mask to 32 bit. Drivers are expected to setup
-	 * the correct supported dma_mask.
-	 */
-	dev->coherent_dma_mask = DMA_BIT_MASK(32);
-
-	/*
-	 * Set it to coherent_dma_mask by default if the architecture
-	 * code has not set it.
-	 */
-	if (!dev->dma_mask)
-		dev->dma_mask = &dev->coherent_dma_mask;
-
-	ret = of_dma_get_range(dev->of_node, &dma_addr, &paddr, &size);
-	if (ret < 0) {
-		dma_addr = offset = 0;
-		size = dev->coherent_dma_mask;
-	} else {
-		offset = PFN_DOWN(paddr - dma_addr);
-		dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset);
-	}
-	dev->dma_pfn_offset = offset;
-
-	coherent = of_dma_is_coherent(dev->of_node);
-	dev_dbg(dev, "device is%sdma coherent\n",
-		coherent ? " " : " not ");
-
-	iommu = of_iommu_configure(dev);
-	dev_dbg(dev, "device is%sbehind an iommu\n",
-		iommu ? " " : " not ");
-
-	arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);
-}
-
 static void of_dma_deconfigure(struct device *dev)
 {
 	arch_teardown_dma_ops(dev);
@@ -236,7 +182,7 @@
 
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
-	of_dma_configure(&dev->dev);
+	of_dma_configure(&dev->dev, dev->dev.of_node);
 
 	if (of_device_add(dev) != 0) {
 		of_dma_deconfigure(&dev->dev);
@@ -299,7 +245,7 @@
 		dev_set_name(&dev->dev, "%s", bus_id);
 	else
 		of_device_make_bus_id(&dev->dev);
-	of_dma_configure(&dev->dev);
+	of_dma_configure(&dev->dev, dev->dev.of_node);
 
 	/* Allow the HW Peripheral ID to be overridden */
 	prop = of_get_property(node, "arm,primecell-periphid", NULL);
diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index 39b2dbe..5f4a2e0 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -16,13 +16,27 @@
 	return bus;
 }
 
-static struct pci_host_bridge *find_pci_host_bridge(struct pci_bus *bus)
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus)
 {
 	struct pci_bus *root_bus = find_pci_root_bus(bus);
 
 	return to_pci_host_bridge(root_bus->bridge);
 }
 
+struct device *pci_get_host_bridge_device(struct pci_dev *dev)
+{
+	struct pci_bus *root_bus = find_pci_root_bus(dev->bus);
+	struct device *bridge = root_bus->bridge;
+
+	kobject_get(&bridge->kobj);
+	return bridge;
+}
+
+void  pci_put_host_bridge_device(struct device *dev)
+{
+	kobject_put(&dev->kobj);
+}
+
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 				 void (*release_fn)(struct pci_host_bridge *),
 				 void *release_data)
@@ -34,7 +48,7 @@
 void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
 			     struct resource *res)
 {
-	struct pci_host_bridge *bridge = find_pci_host_bridge(bus);
+	struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
 	struct resource_entry *window;
 	resource_size_t offset = 0;
 
@@ -59,7 +73,7 @@
 void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
 			     struct pci_bus_region *region)
 {
-	struct pci_host_bridge *bridge = find_pci_host_bridge(bus);
+	struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
 	struct resource_entry *window;
 	resource_size_t offset = 0;
 
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 7b892a9..1dfb567 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -106,4 +106,23 @@
 	bool "ARM Versatile PB PCI controller"
 	depends on ARCH_VERSATILE
 
+config PCIE_IPROC
+	tristate "Broadcom iProc PCIe controller"
+	depends on OF && ARM
+	default n
+	help
+	  This enables the iProc PCIe core controller support for Broadcom's
+	  iProc family of SoCs. An appropriate bus interface driver also needs
+	  to be enabled
+
+config PCIE_IPROC_PLATFORM
+	tristate "Broadcom iProc PCIe platform bus driver"
+	depends on ARCH_BCM_IPROC || (ARM && COMPILE_TEST)
+	depends on OF
+	select PCIE_IPROC
+	default ARCH_BCM_IPROC
+	help
+	  Say Y here if you want to use the Broadcom iProc PCIe controller
+	  through the generic platform bus interface
+
 endmenu
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index e61d91c..f733b4e 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -13,3 +13,5 @@
 obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
 obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
 obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
+obj-$(CONFIG_PCIE_IPROC) += pcie-iproc.o
+obj-$(CONFIG_PCIE_IPROC_PLATFORM) += pcie-iproc-platform.o
diff --git a/drivers/pci/host/pci-exynos.c b/drivers/pci/host/pci-exynos.c
index d202b37..c139237 100644
--- a/drivers/pci/host/pci-exynos.c
+++ b/drivers/pci/host/pci-exynos.c
@@ -396,7 +396,7 @@
 
 	/* enable INTX interrupt */
 	val = IRQ_INTA_ASSERT | IRQ_INTB_ASSERT |
-		IRQ_INTC_ASSERT | IRQ_INTD_ASSERT,
+		IRQ_INTC_ASSERT | IRQ_INTD_ASSERT;
 	exynos_elb_writel(exynos_pcie, val, PCIE_IRQ_EN_PULSE);
 }
 
diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index 66d8ea4..f34892e 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c
@@ -496,11 +496,12 @@
 
 	/* Index 1 is the application reg. space address */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	ks_pcie->app = *res;
 	ks_pcie->va_app_base = devm_ioremap_resource(pp->dev, res);
 	if (IS_ERR(ks_pcie->va_app_base))
 		return PTR_ERR(ks_pcie->va_app_base);
 
+	ks_pcie->app = *res;
+
 	/* Create legacy IRQ domain */
 	ks_pcie->legacy_irq_domain =
 			irq_domain_add_linear(ks_pcie->legacy_intc_np,
diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c
index 68c9e5e..4a6e62f 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -127,14 +127,11 @@
 	pcie->dev = &pdev->dev;
 
 	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
-	if (!dbi_base) {
-		dev_err(&pdev->dev, "missing *regs* space\n");
-		return -ENODEV;
-	}
-
 	pcie->dbi = devm_ioremap_resource(&pdev->dev, dbi_base);
-	if (IS_ERR(pcie->dbi))
+	if (IS_ERR(pcie->dbi)) {
+		dev_err(&pdev->dev, "missing *regs* space\n");
 		return PTR_ERR(pcie->dbi);
+	}
 
 	pcie->scfg = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 						     "fsl,pcie-scfg");
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 1309cfb..1ab8635 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -129,6 +129,7 @@
 	size_t memwin_size;
 	phys_addr_t iowin_base;
 	size_t iowin_size;
+	u32 saved_pcie_stat;
 };
 
 static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
@@ -899,6 +900,35 @@
 		pcie->msi->dev = &pcie->pdev->dev;
 }
 
+static int mvebu_pcie_suspend(struct device *dev)
+{
+	struct mvebu_pcie *pcie;
+	int i;
+
+	pcie = dev_get_drvdata(dev);
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = pcie->ports + i;
+		port->saved_pcie_stat = mvebu_readl(port, PCIE_STAT_OFF);
+	}
+
+	return 0;
+}
+
+static int mvebu_pcie_resume(struct device *dev)
+{
+	struct mvebu_pcie *pcie;
+	int i;
+
+	pcie = dev_get_drvdata(dev);
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = pcie->ports + i;
+		mvebu_writel(port, port->saved_pcie_stat, PCIE_STAT_OFF);
+		mvebu_pcie_setup_hw(port);
+	}
+
+	return 0;
+}
+
 static int mvebu_pcie_probe(struct platform_device *pdev)
 {
 	struct mvebu_pcie *pcie;
@@ -1056,6 +1086,8 @@
 	mvebu_pcie_msi_enable(pcie);
 	mvebu_pcie_enable(pcie);
 
+	platform_set_drvdata(pdev, pcie);
+
 	return 0;
 }
 
@@ -1068,12 +1100,18 @@
 };
 MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table);
 
+static struct dev_pm_ops mvebu_pcie_pm_ops = {
+	.suspend_noirq = mvebu_pcie_suspend,
+	.resume_noirq = mvebu_pcie_resume,
+};
+
 static struct platform_driver mvebu_pcie_driver = {
 	.driver = {
 		.name = "mvebu-pcie",
 		.of_match_table = mvebu_pcie_of_match_table,
 		/* driver unloading/unbinding currently not supported */
 		.suppress_bind_attrs = true,
+		.pm = &mvebu_pcie_pm_ops,
 	},
 	.probe = mvebu_pcie_probe,
 };
diff --git a/drivers/pci/host/pci-rcar-gen2.c b/drivers/pci/host/pci-rcar-gen2.c
index dd6b84e..367e28f 100644
--- a/drivers/pci/host/pci-rcar-gen2.c
+++ b/drivers/pci/host/pci-rcar-gen2.c
@@ -301,6 +301,9 @@
 	if (!mem_res || !mem_res->start)
 		return -ENODEV;
 
+	if (mem_res->start & 0xFFFF)
+		return -EINVAL;
+
 	priv = devm_kzalloc(&pdev->dev,
 			    sizeof(struct rcar_pci_priv), GFP_KERNEL);
 	if (!priv)
diff --git a/drivers/pci/host/pci-versatile.c b/drivers/pci/host/pci-versatile.c
index 464bf49..0863d9c 100644
--- a/drivers/pci/host/pci-versatile.c
+++ b/drivers/pci/host/pci-versatile.c
@@ -138,19 +138,19 @@
 	LIST_HEAD(pci_res);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
 	versatile_pci_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(versatile_pci_base))
+		return PTR_ERR(versatile_pci_base);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!res)
-		return -ENODEV;
 	versatile_cfg_base[0] = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(versatile_cfg_base[0]))
+		return PTR_ERR(versatile_cfg_base[0]);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	if (!res)
-		return -ENODEV;
 	versatile_cfg_base[1] = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(versatile_cfg_base[1]))
+		return PTR_ERR(versatile_cfg_base[1]);
 
 	ret = versatile_pci_parse_request_of_pci_ranges(&pdev->dev, &pci_res);
 	if (ret)
@@ -214,6 +214,7 @@
 
 	pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci);
 	pci_assign_unassigned_bus_resources(bus);
+	pci_bus_add_devices(bus);
 
 	return 0;
 }
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 1f4ea6f..2e9f84f 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -342,7 +342,7 @@
 	.map = dw_pcie_msi_map,
 };
 
-int __init dw_pcie_host_init(struct pcie_port *pp)
+int dw_pcie_host_init(struct pcie_port *pp)
 {
 	struct device_node *np = pp->dev->of_node;
 	struct platform_device *pdev = to_platform_device(pp->dev);
diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
new file mode 100644
index 0000000..afad6c2
--- /dev/null
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+
+#include "pcie-iproc.h"
+
+static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
+{
+	struct iproc_pcie *pcie;
+	struct device_node *np = pdev->dev.of_node;
+	struct resource reg;
+	resource_size_t iobase = 0;
+	LIST_HEAD(res);
+	int ret;
+
+	pcie = devm_kzalloc(&pdev->dev, sizeof(struct iproc_pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pcie->dev = &pdev->dev;
+	platform_set_drvdata(pdev, pcie);
+
+	ret = of_address_to_resource(np, 0, &reg);
+	if (ret < 0) {
+		dev_err(pcie->dev, "unable to obtain controller resources\n");
+		return ret;
+	}
+
+	pcie->base = devm_ioremap(pcie->dev, reg.start, resource_size(&reg));
+	if (!pcie->base) {
+		dev_err(pcie->dev, "unable to map controller registers\n");
+		return -ENOMEM;
+	}
+
+	/* PHY use is optional */
+	pcie->phy = devm_phy_get(&pdev->dev, "pcie-phy");
+	if (IS_ERR(pcie->phy)) {
+		if (PTR_ERR(pcie->phy) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		pcie->phy = NULL;
+	}
+
+	ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
+	if (ret) {
+		dev_err(pcie->dev,
+			"unable to get PCI host bridge resources\n");
+		return ret;
+	}
+
+	pcie->resources = &res;
+
+	ret = iproc_pcie_setup(pcie);
+	if (ret) {
+		dev_err(pcie->dev, "PCIe controller setup failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
+{
+	struct iproc_pcie *pcie = platform_get_drvdata(pdev);
+
+	return iproc_pcie_remove(pcie);
+}
+
+static const struct of_device_id iproc_pcie_of_match_table[] = {
+	{ .compatible = "brcm,iproc-pcie", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, iproc_pcie_of_match_table);
+
+static struct platform_driver iproc_pcie_pltfm_driver = {
+	.driver = {
+		.name = "iproc-pcie",
+		.of_match_table = of_match_ptr(iproc_pcie_of_match_table),
+	},
+	.probe = iproc_pcie_pltfm_probe,
+	.remove = iproc_pcie_pltfm_remove,
+};
+module_platform_driver(iproc_pcie_pltfm_driver);
+
+MODULE_AUTHOR("Ray Jui <rjui@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom iPROC PCIe platform driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
new file mode 100644
index 0000000..329e1b5
--- /dev/null
+++ b/drivers/pci/host/pcie-iproc.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2014 Hauke Mehrtens <hauke@hauke-m.de>
+ * Copyright (C) 2015 Broadcom Corporatcommon ion
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/mbus.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+
+#include "pcie-iproc.h"
+
+#define CLK_CONTROL_OFFSET           0x000
+#define EP_MODE_SURVIVE_PERST_SHIFT  1
+#define EP_MODE_SURVIVE_PERST        BIT(EP_MODE_SURVIVE_PERST_SHIFT)
+#define RC_PCIE_RST_OUTPUT_SHIFT     0
+#define RC_PCIE_RST_OUTPUT           BIT(RC_PCIE_RST_OUTPUT_SHIFT)
+
+#define CFG_IND_ADDR_OFFSET          0x120
+#define CFG_IND_ADDR_MASK            0x00001ffc
+
+#define CFG_IND_DATA_OFFSET          0x124
+
+#define CFG_ADDR_OFFSET              0x1f8
+#define CFG_ADDR_BUS_NUM_SHIFT       20
+#define CFG_ADDR_BUS_NUM_MASK        0x0ff00000
+#define CFG_ADDR_DEV_NUM_SHIFT       15
+#define CFG_ADDR_DEV_NUM_MASK        0x000f8000
+#define CFG_ADDR_FUNC_NUM_SHIFT      12
+#define CFG_ADDR_FUNC_NUM_MASK       0x00007000
+#define CFG_ADDR_REG_NUM_SHIFT       2
+#define CFG_ADDR_REG_NUM_MASK        0x00000ffc
+#define CFG_ADDR_CFG_TYPE_SHIFT      0
+#define CFG_ADDR_CFG_TYPE_MASK       0x00000003
+
+#define CFG_DATA_OFFSET              0x1fc
+
+#define SYS_RC_INTX_EN               0x330
+#define SYS_RC_INTX_MASK             0xf
+
+static inline struct iproc_pcie *sys_to_pcie(struct pci_sys_data *sys)
+{
+	return sys->private_data;
+}
+
+/**
+ * Note access to the configuration registers are protected at the higher layer
+ * by 'pci_lock' in drivers/pci/access.c
+ */
+static void __iomem *iproc_pcie_map_cfg_bus(struct pci_bus *bus,
+					    unsigned int devfn,
+					    int where)
+{
+	struct pci_sys_data *sys = bus->sysdata;
+	struct iproc_pcie *pcie = sys_to_pcie(sys);
+	unsigned slot = PCI_SLOT(devfn);
+	unsigned fn = PCI_FUNC(devfn);
+	unsigned busno = bus->number;
+	u32 val;
+
+	/* root complex access */
+	if (busno == 0) {
+		if (slot >= 1)
+			return NULL;
+		writel(where & CFG_IND_ADDR_MASK,
+		       pcie->base + CFG_IND_ADDR_OFFSET);
+		return (pcie->base + CFG_IND_DATA_OFFSET);
+	}
+
+	if (fn > 1)
+		return NULL;
+
+	/* EP device access */
+	val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
+		(slot << CFG_ADDR_DEV_NUM_SHIFT) |
+		(fn << CFG_ADDR_FUNC_NUM_SHIFT) |
+		(where & CFG_ADDR_REG_NUM_MASK) |
+		(1 & CFG_ADDR_CFG_TYPE_MASK);
+	writel(val, pcie->base + CFG_ADDR_OFFSET);
+
+	return (pcie->base + CFG_DATA_OFFSET);
+}
+
+static struct pci_ops iproc_pcie_ops = {
+	.map_bus = iproc_pcie_map_cfg_bus,
+	.read = pci_generic_config_read32,
+	.write = pci_generic_config_write32,
+};
+
+static void iproc_pcie_reset(struct iproc_pcie *pcie)
+{
+	u32 val;
+
+	/*
+	 * Configure the PCIe controller as root complex and send a downstream
+	 * reset
+	 */
+	val = EP_MODE_SURVIVE_PERST | RC_PCIE_RST_OUTPUT;
+	writel(val, pcie->base + CLK_CONTROL_OFFSET);
+	udelay(250);
+	val &= ~EP_MODE_SURVIVE_PERST;
+	writel(val, pcie->base + CLK_CONTROL_OFFSET);
+	msleep(250);
+}
+
+static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus)
+{
+	u8 hdr_type;
+	u32 link_ctrl;
+	u16 pos, link_status;
+	int link_is_active = 0;
+
+	/* make sure we are not in EP mode */
+	pci_bus_read_config_byte(bus, 0, PCI_HEADER_TYPE, &hdr_type);
+	if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) {
+		dev_err(pcie->dev, "in EP mode, hdr=%#02x\n", hdr_type);
+		return -EFAULT;
+	}
+
+	/* force class to PCI_CLASS_BRIDGE_PCI (0x0604) */
+	pci_bus_write_config_word(bus, 0, PCI_CLASS_DEVICE,
+				  PCI_CLASS_BRIDGE_PCI);
+
+	/* check link status to see if link is active */
+	pos = pci_bus_find_capability(bus, 0, PCI_CAP_ID_EXP);
+	pci_bus_read_config_word(bus, 0, pos + PCI_EXP_LNKSTA, &link_status);
+	if (link_status & PCI_EXP_LNKSTA_NLW)
+		link_is_active = 1;
+
+	if (!link_is_active) {
+		/* try GEN 1 link speed */
+#define PCI_LINK_STATUS_CTRL_2_OFFSET 0x0dc
+#define PCI_TARGET_LINK_SPEED_MASK    0xf
+#define PCI_TARGET_LINK_SPEED_GEN2    0x2
+#define PCI_TARGET_LINK_SPEED_GEN1    0x1
+		pci_bus_read_config_dword(bus, 0,
+					  PCI_LINK_STATUS_CTRL_2_OFFSET,
+					  &link_ctrl);
+		if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) ==
+		    PCI_TARGET_LINK_SPEED_GEN2) {
+			link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK;
+			link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1;
+			pci_bus_write_config_dword(bus, 0,
+					   PCI_LINK_STATUS_CTRL_2_OFFSET,
+					   link_ctrl);
+			msleep(100);
+
+			pos = pci_bus_find_capability(bus, 0, PCI_CAP_ID_EXP);
+			pci_bus_read_config_word(bus, 0, pos + PCI_EXP_LNKSTA,
+						 &link_status);
+			if (link_status & PCI_EXP_LNKSTA_NLW)
+				link_is_active = 1;
+		}
+	}
+
+	dev_info(pcie->dev, "link: %s\n", link_is_active ? "UP" : "DOWN");
+
+	return link_is_active ? 0 : -ENODEV;
+}
+
+static void iproc_pcie_enable(struct iproc_pcie *pcie)
+{
+	writel(SYS_RC_INTX_MASK, pcie->base + SYS_RC_INTX_EN);
+}
+
+int iproc_pcie_setup(struct iproc_pcie *pcie)
+{
+	int ret;
+	struct pci_bus *bus;
+
+	if (!pcie || !pcie->dev || !pcie->base)
+		return -EINVAL;
+
+	if (pcie->phy) {
+		ret = phy_init(pcie->phy);
+		if (ret) {
+			dev_err(pcie->dev, "unable to initialize PCIe PHY\n");
+			return ret;
+		}
+
+		ret = phy_power_on(pcie->phy);
+		if (ret) {
+			dev_err(pcie->dev, "unable to power on PCIe PHY\n");
+			goto err_exit_phy;
+		}
+
+	}
+
+	iproc_pcie_reset(pcie);
+
+	pcie->sysdata.private_data = pcie;
+
+	bus = pci_create_root_bus(pcie->dev, 0, &iproc_pcie_ops,
+				  &pcie->sysdata, pcie->resources);
+	if (!bus) {
+		dev_err(pcie->dev, "unable to create PCI root bus\n");
+		ret = -ENOMEM;
+		goto err_power_off_phy;
+	}
+	pcie->root_bus = bus;
+
+	ret = iproc_pcie_check_link(pcie, bus);
+	if (ret) {
+		dev_err(pcie->dev, "no PCIe EP device detected\n");
+		goto err_rm_root_bus;
+	}
+
+	iproc_pcie_enable(pcie);
+
+	pci_scan_child_bus(bus);
+	pci_assign_unassigned_bus_resources(bus);
+	pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci);
+	pci_bus_add_devices(bus);
+
+	return 0;
+
+err_rm_root_bus:
+	pci_stop_root_bus(bus);
+	pci_remove_root_bus(bus);
+
+err_power_off_phy:
+	if (pcie->phy)
+		phy_power_off(pcie->phy);
+err_exit_phy:
+	if (pcie->phy)
+		phy_exit(pcie->phy);
+
+	return ret;
+}
+EXPORT_SYMBOL(iproc_pcie_setup);
+
+int iproc_pcie_remove(struct iproc_pcie *pcie)
+{
+	pci_stop_root_bus(pcie->root_bus);
+	pci_remove_root_bus(pcie->root_bus);
+
+	if (pcie->phy) {
+		phy_power_off(pcie->phy);
+		phy_exit(pcie->phy);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(iproc_pcie_remove);
+
+MODULE_AUTHOR("Ray Jui <rjui@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom iPROC PCIe common driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
new file mode 100644
index 0000000..e28075e
--- /dev/null
+++ b/drivers/pci/host/pcie-iproc.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014-2015 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _PCIE_IPROC_H
+#define _PCIE_IPROC_H
+
+#define IPROC_PCIE_MAX_NUM_IRQS 6
+
+/**
+ * iProc PCIe device
+ * @dev: pointer to device data structure
+ * @base: PCIe host controller I/O register base
+ * @resources: linked list of all PCI resources
+ * @sysdata: Per PCI controller data
+ * @root_bus: pointer to root bus
+ * @phy: optional PHY device that controls the Serdes
+ * @irqs: interrupt IDs
+ */
+struct iproc_pcie {
+	struct device *dev;
+	void __iomem *base;
+	struct list_head *resources;
+	struct pci_sys_data sysdata;
+	struct pci_bus *root_bus;
+	struct phy *phy;
+	int irqs[IPROC_PCIE_MAX_NUM_IRQS];
+};
+
+int iproc_pcie_setup(struct iproc_pcie *pcie);
+int iproc_pcie_remove(struct iproc_pcie *pcie);
+
+#endif /* _PCIE_IPROC_H */
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index c57bd0a..c086210 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -64,8 +64,8 @@
 #define  LAR_ENABLE		(1 << 1)
 
 /* PCIe address reg & mask */
-#define PCIEPARL(x)		(0x03400 + ((x) * 0x20))
-#define PCIEPARH(x)		(0x03404 + ((x) * 0x20))
+#define PCIEPALR(x)		(0x03400 + ((x) * 0x20))
+#define PCIEPAUR(x)		(0x03404 + ((x) * 0x20))
 #define PCIEPAMR(x)		(0x03408 + ((x) * 0x20))
 #define PCIEPTCTLR(x)		(0x0340c + ((x) * 0x20))
 #define  PAR_ENABLE		(1 << 31)
@@ -341,8 +341,9 @@
 	else
 		res_start = res->start;
 
-	rcar_pci_write_reg(pcie, upper_32_bits(res_start), PCIEPARH(win));
-	rcar_pci_write_reg(pcie, lower_32_bits(res_start), PCIEPARL(win));
+	rcar_pci_write_reg(pcie, upper_32_bits(res_start), PCIEPAUR(win));
+	rcar_pci_write_reg(pcie, lower_32_bits(res_start) & ~0x7F,
+			   PCIEPALR(win));
 
 	/* First resource is for IO */
 	mask = PAR_ENABLE;
@@ -501,7 +502,7 @@
 
 	/* Enable MSI */
 	if (IS_ENABLED(CONFIG_PCI_MSI))
-		rcar_pci_write_reg(pcie, 0x101f0000, PCIEMSITXR);
+		rcar_pci_write_reg(pcie, 0x801f0000, PCIEMSITXR);
 
 	/* Finish initialization - establish a PCI Express link */
 	rcar_pci_write_reg(pcie, CFINIT, PCIETCTLR);
diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c
index 866465f..020d788 100644
--- a/drivers/pci/host/pcie-spear13xx.c
+++ b/drivers/pci/host/pcie-spear13xx.c
@@ -269,7 +269,7 @@
 	.host_init = spear13xx_pcie_host_init,
 };
 
-static int __init spear13xx_add_pcie_port(struct pcie_port *pp,
+static int spear13xx_add_pcie_port(struct pcie_port *pp,
 					 struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -299,7 +299,7 @@
 	return 0;
 }
 
-static int __init spear13xx_pcie_probe(struct platform_device *pdev)
+static int spear13xx_pcie_probe(struct platform_device *pdev)
 {
 	struct spear13xx_pcie *spear13xx_pcie;
 	struct pcie_port *pp;
@@ -370,7 +370,7 @@
 };
 MODULE_DEVICE_TABLE(of, spear13xx_pcie_of_match);
 
-static struct platform_driver spear13xx_pcie_driver __initdata = {
+static struct platform_driver spear13xx_pcie_driver = {
 	.probe		= spear13xx_pcie_probe,
 	.driver = {
 		.name	= "spear-pcie",
diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
index 7d48eca..788db48 100644
--- a/drivers/pci/hotplug/cpci_hotplug_pci.c
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c
@@ -286,11 +286,12 @@
 	}
 	parent = slot->dev->bus;
 
-	list_for_each_entry(dev, &parent->devices, bus_list)
+	list_for_each_entry(dev, &parent->devices, bus_list) {
 		if (PCI_SLOT(dev->devfn) != PCI_SLOT(slot->devfn))
 			continue;
 		if (pci_is_bridge(dev))
 			pci_hp_add_bridge(dev);
+	}
 
 
 	pci_assign_unassigned_bridge_resources(parent->self);
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 96c5c72..1530247 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -738,7 +738,7 @@
  */
 static u8 bus_structure_fixup(u8 busno)
 {
-	struct pci_bus *bus;
+	struct pci_bus *bus, *b;
 	struct pci_dev *dev;
 	u16 l;
 
@@ -765,7 +765,11 @@
 					(l != 0x0000) && (l != 0xffff)) {
 			debug("%s - Inside bus_structure_fixup()\n",
 							__func__);
-			pci_scan_bus(busno, ibmphp_pci_bus->ops, NULL);
+			b = pci_scan_bus(busno, ibmphp_pci_bus->ops, NULL);
+			if (!b)
+				continue;
+
+			pci_bus_add_devices(b);
 			break;
 		}
 	}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 4890639..6f6f175 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -18,6 +18,15 @@
 #include <linux/pm_qos.h>
 #include "pci.h"
 
+/*
+ * The UUID is defined in the PCI Firmware Specification available here:
+ * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
+ */
+const u8 pci_acpi_dsm_uuid[] = {
+	0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d,
+	0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
+};
+
 phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
 {
 	acpi_status status = AE_NOT_EXIST;
@@ -248,6 +257,9 @@
 	acpi_handle handle, phandle;
 	struct pci_bus *pbus;
 
+	if (acpi_pci_disabled)
+		return -ENODEV;
+
 	handle = NULL;
 	for (pbus = dev->bus; pbus; pbus = pbus->parent) {
 		handle = acpi_pci_get_bridge_handle(pbus);
@@ -528,11 +540,32 @@
 
 void acpi_pci_add_bus(struct pci_bus *bus)
 {
+	union acpi_object *obj;
+	struct pci_host_bridge *bridge;
+
 	if (acpi_pci_disabled || !bus->bridge)
 		return;
 
 	acpi_pci_slot_enumerate(bus);
 	acpiphp_enumerate_slots(bus);
+
+	/*
+	 * For a host bridge, check its _DSM for function 8 and if
+	 * that is available, mark it in pci_host_bridge.
+	 */
+	if (!pci_is_root_bus(bus))
+		return;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3,
+				RESET_DELAY_DSM, NULL);
+	if (!obj)
+		return;
+
+	if (obj->type == ACPI_TYPE_INTEGER && obj->integer.value == 1) {
+		bridge = pci_find_host_bridge(bus);
+		bridge->ignore_reset_delay = 1;
+	}
+	ACPI_FREE(obj);
 }
 
 void acpi_pci_remove_bus(struct pci_bus *bus)
@@ -558,6 +591,57 @@
 				      check_children);
 }
 
+/**
+ * pci_acpi_optimize_delay - optimize PCI D3 and D3cold delay from ACPI
+ * @pdev: the PCI device whose delay is to be updated
+ * @adev: the companion ACPI device of this PCI device
+ *
+ * Update the d3_delay and d3cold_delay of a PCI device from the ACPI _DSM
+ * control method of either the device itself or the PCI host bridge.
+ *
+ * Function 8, "Reset Delay," applies to the entire hierarchy below a PCI
+ * host bridge.  If it returns one, the OS may assume that all devices in
+ * the hierarchy have already completed power-on reset delays.
+ *
+ * Function 9, "Device Readiness Durations," applies only to the object
+ * where it is located.  It returns delay durations required after various
+ * events if the device requires less time than the spec requires.  Delays
+ * from this function take precedence over the Reset Delay function.
+ *
+ * These _DSM functions are defined by the draft ECN of January 28, 2014,
+ * titled "ACPI additions for FW latency optimizations."
+ */
+static void pci_acpi_optimize_delay(struct pci_dev *pdev,
+				    acpi_handle handle)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+	int value;
+	union acpi_object *obj, *elements;
+
+	if (bridge->ignore_reset_delay)
+		pdev->d3cold_delay = 0;
+
+	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3,
+				FUNCTION_DELAY_DSM, NULL);
+	if (!obj)
+		return;
+
+	if (obj->type == ACPI_TYPE_PACKAGE && obj->package.count == 5) {
+		elements = obj->package.elements;
+		if (elements[0].type == ACPI_TYPE_INTEGER) {
+			value = (int)elements[0].integer.value / 1000;
+			if (value < PCI_PM_D3COLD_WAIT)
+				pdev->d3cold_delay = value;
+		}
+		if (elements[3].type == ACPI_TYPE_INTEGER) {
+			value = (int)elements[3].integer.value / 1000;
+			if (value < PCI_PM_D3_WAIT)
+				pdev->d3_delay = value;
+		}
+	}
+	ACPI_FREE(obj);
+}
+
 static void pci_acpi_setup(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
@@ -566,6 +650,8 @@
 	if (!adev)
 		return;
 
+	pci_acpi_optimize_delay(pci_dev, adev->handle);
+
 	pci_acpi_add_pm_notifier(adev, pci_dev);
 	if (!adev->wakeup.flags.valid)
 		return;
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 2ab1b47..024b5c1 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -31,8 +31,6 @@
 #include <linux/pci-acpi.h>
 #include "pci.h"
 
-#define	DEVICE_LABEL_DSM	0x07
-
 #ifdef CONFIG_DMI
 enum smbios_attr_enum {
 	SMBIOS_ATTR_NONE = 0,
@@ -148,11 +146,6 @@
 #endif
 
 #ifdef CONFIG_ACPI
-static const char device_label_dsm_uuid[] = {
-	0xD0, 0x37, 0xC9, 0xE5, 0x53, 0x35, 0x7A, 0x4D,
-	0x91, 0x17, 0xEA, 0x4D, 0x19, 0xC3, 0x43, 0x4D
-};
-
 enum acpi_attr_enum {
 	ACPI_ATTR_LABEL_SHOW,
 	ACPI_ATTR_INDEX_SHOW,
@@ -179,7 +172,7 @@
 	if (!handle)
 		return -1;
 
-	obj = acpi_evaluate_dsm(handle, device_label_dsm_uuid, 0x2,
+	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2,
 				DEVICE_LABEL_DSM, NULL);
 	if (!obj)
 		return -1;
@@ -219,7 +212,7 @@
 	if (!handle)
 		return false;
 
-	return !!acpi_check_dsm(handle, device_label_dsm_uuid, 0x2,
+	return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2,
 				1 << DEVICE_LABEL_DSM);
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 81f06e8..acc4b6e 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -126,15 +126,16 @@
 #ifdef CONFIG_HAS_IOMEM
 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
+	struct resource *res = &pdev->resource[bar];
+
 	/*
 	 * Make sure the BAR is actually a memory resource, not an IO resource
 	 */
-	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
-		WARN_ON(1);
+	if (res->flags & IORESOURCE_UNSET || !(res->flags & IORESOURCE_MEM)) {
+		dev_warn(&pdev->dev, "can't ioremap BAR %d: %pR\n", bar, res);
 		return NULL;
 	}
-	return ioremap_nocache(pci_resource_start(pdev, bar),
-				     pci_resource_len(pdev, bar));
+	return ioremap_nocache(res->start, resource_size(res));
 }
 EXPORT_SYMBOL_GPL(pci_ioremap_bar);
 #endif
@@ -145,19 +146,22 @@
 				   u8 pos, int cap, int *ttl)
 {
 	u8 id;
+	u16 ent;
+
+	pci_bus_read_config_byte(bus, devfn, pos, &pos);
 
 	while ((*ttl)--) {
-		pci_bus_read_config_byte(bus, devfn, pos, &pos);
 		if (pos < 0x40)
 			break;
 		pos &= ~3;
-		pci_bus_read_config_byte(bus, devfn, pos + PCI_CAP_LIST_ID,
-					 &id);
+		pci_bus_read_config_word(bus, devfn, pos, &ent);
+
+		id = ent & 0xff;
 		if (id == 0xff)
 			break;
 		if (id == cap)
 			return pos;
-		pos += PCI_CAP_LIST_NEXT;
+		pos = (ent >> 8);
 	}
 	return 0;
 }
@@ -2492,6 +2496,7 @@
 	*pinp = pin;
 	return PCI_SLOT(dev->devfn);
 }
+EXPORT_SYMBOL_GPL(pci_common_swizzle);
 
 /**
  *	pci_release_region - Release a PCI bar
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4091f82..d72f849 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -321,4 +321,6 @@
 }
 #endif
 
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index c6849d9..167fe41 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -132,16 +132,8 @@
 static void __print_tlp_header(struct pci_dev *dev,
 			       struct aer_header_log_regs *t)
 {
-	unsigned char *tlp = (unsigned char *)&t;
-
-	dev_err(&dev->dev, "  TLP Header:"
-		" %02x%02x%02x%02x %02x%02x%02x%02x"
-		" %02x%02x%02x%02x %02x%02x%02x%02x\n",
-		*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
-		*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
-		*(tlp + 11), *(tlp + 10), *(tlp + 9),
-		*(tlp + 8), *(tlp + 15), *(tlp + 14),
-		*(tlp + 13), *(tlp + 12));
+	dev_err(&dev->dev, "  TLP Header: %08x %08x %08x %08x\n",
+		t->dw0, t->dw1, t->dw2, t->dw3);
 }
 
 static void __aer_print_error(struct pci_dev *dev,
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 820740a..7d4fcdc 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -782,24 +782,6 @@
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
-void pcie_clear_aspm(struct pci_bus *bus)
-{
-	struct pci_dev *child;
-
-	if (aspm_force)
-		return;
-
-	/*
-	 * Clear any ASPM setup that the firmware has carried out on this bus
-	 */
-	list_for_each_entry(child, &bus->devices, bus_list) {
-		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
-					 PCIE_LINK_STATE_L1 |
-					 PCIE_LINK_STATE_CLKPM,
-					 false, true);
-	}
-}
-
 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
 {
 	int i;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8d2f400..6675a7a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/of_pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -1520,6 +1521,7 @@
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
+	of_pci_dma_configure(dev);
 
 	pci_set_dma_max_seg_size(dev, 65536);
 	pci_set_dma_seg_boundary(dev, 0xffffffff);
@@ -1993,6 +1995,7 @@
 	kfree(b);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(pci_create_root_bus);
 
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
 {
@@ -2087,7 +2090,6 @@
 	if (!found)
 		pci_bus_update_busn_res_end(b, max);
 
-	pci_bus_add_devices(b);
 	return b;
 }
 EXPORT_SYMBOL(pci_scan_root_bus);
@@ -2123,7 +2125,6 @@
 	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
 	if (b) {
 		pci_scan_child_bus(b);
-		pci_bus_add_devices(b);
 	} else {
 		pci_free_resource_list(&resources);
 	}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 85f247e..c6dc1df 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3182,7 +3182,7 @@
 			|| nhi->subsystem_vendor != 0x2222
 			|| nhi->subsystem_device != 0x1111)
 		goto out;
-	dev_info(&dev->dev, "quirk: wating for thunderbolt to reestablish pci tunnels...\n");
+	dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n");
 	device_pm_wait_for_dev(&dev->dev, &nhi->dev);
 out:
 	pci_dev_put(nhi);
@@ -3822,6 +3822,38 @@
 	{ PCI_VENDOR_ID_INTEL, 0x154F, pci_quirk_mf_endpoint_acs },
 	{ PCI_VENDOR_ID_INTEL, 0x1551, pci_quirk_mf_endpoint_acs },
 	{ PCI_VENDOR_ID_INTEL, 0x1558, pci_quirk_mf_endpoint_acs },
+	/* 82580 */
+	{ PCI_VENDOR_ID_INTEL, 0x1509, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150E, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150F, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1510, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1511, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1516, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1527, pci_quirk_mf_endpoint_acs },
+	/* 82576 */
+	{ PCI_VENDOR_ID_INTEL, 0x10C9, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E6, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10E8, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150A, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x150D, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1518, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1526, pci_quirk_mf_endpoint_acs },
+	/* 82575 */
+	{ PCI_VENDOR_ID_INTEL, 0x10A7, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10A9, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10D6, pci_quirk_mf_endpoint_acs },
+	/* I350 */
+	{ PCI_VENDOR_ID_INTEL, 0x1521, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1522, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1523, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1524, pci_quirk_mf_endpoint_acs },
+	/* 82571 (Quads omitted due to non-ACS switch) */
+	{ PCI_VENDOR_ID_INTEL, 0x105E, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x105F, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x1060, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_INTEL, 0x10D9, pci_quirk_mf_endpoint_acs },
+	/* Intel PCH root ports */
 	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_pch_acs },
 	{ 0x19a2, 0x710, pci_quirk_mf_endpoint_acs }, /* Emulex BE3-R */
 	{ 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8bd76c9..8a280e9 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -139,6 +139,7 @@
 	/* stop the host bridge */
 	device_release_driver(&host_bridge->dev);
 }
+EXPORT_SYMBOL_GPL(pci_stop_root_bus);
 
 void pci_remove_root_bus(struct pci_bus *bus)
 {
@@ -158,3 +159,4 @@
 	/* remove the host bridge */
 	device_unregister(&host_bridge->dev);
 }
+EXPORT_SYMBOL_GPL(pci_remove_root_bus);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index e3e17f3..8169597 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1750,3 +1750,4 @@
 	__pci_bus_assign_resources(bus, &add_list, NULL);
 	BUG_ON(!list_empty(&add_list));
 }
+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bus_resources);
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index 4e2d595..95c225b 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -65,3 +65,4 @@
 	for_each_pci_dev(dev)
 		pdev_fixup_irq(dev, swizzle, map_irq);
 }
+EXPORT_SYMBOL_GPL(pci_fixup_irqs);
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index b7c3a5e..232f925 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -120,6 +120,7 @@
 	if (!root) {
 		dev_info(&dev->dev, "can't claim BAR %d %pR: no compatible bridge window\n",
 			 resource, res);
+		res->flags |= IORESOURCE_UNSET;
 		return -EINVAL;
 	}
 
@@ -127,6 +128,7 @@
 	if (conflict) {
 		dev_info(&dev->dev, "can't claim BAR %d %pR: address conflict with %s %pR\n",
 			 resource, res, conflict->name, conflict);
+		res->flags |= IORESOURCE_UNSET;
 		return -EBUSY;
 	}
 
diff --git a/drivers/phy/phy-qcom-ufs.c b/drivers/phy/phy-qcom-ufs.c
index c4199e6..f9c618f 100644
--- a/drivers/phy/phy-qcom-ufs.c
+++ b/drivers/phy/phy-qcom-ufs.c
@@ -335,10 +335,10 @@
 			goto out;
 		}
 		uA_load = on ? vreg->max_uA : 0;
-		ret = regulator_set_optimum_mode(reg, uA_load);
+		ret = regulator_set_load(reg, uA_load);
 		if (ret >= 0) {
 			/*
-			 * regulator_set_optimum_mode() returns new regulator
+			 * regulator_set_load() returns new regulator
 			 * mode upon success.
 			 */
 			ret = 0;
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 15c0fab..b4e9447 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -177,7 +177,7 @@
 	unsigned char curr_pwm;
 
 	/* Power supply */
-	struct power_supply psy;
+	struct power_supply *psy;
 	struct power_supply_info psy_info;
 	char bat_model_name[BAT_MODEL_NAME_LEN + 1];
 	char bat_manufacturer_name[BAT_MANUFACTURER_NAME_LEN + 1];
@@ -565,8 +565,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct compal_data *data;
-	data = container_of(psy, struct compal_data, psy);
+	struct compal_data *data = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -875,13 +874,16 @@
 };
 MODULE_DEVICE_TABLE(dmi, compal_dmi_table);
 
+static const struct power_supply_desc psy_bat_desc = {
+	.name		= DRIVER_NAME,
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= compal_bat_properties,
+	.num_properties	= ARRAY_SIZE(compal_bat_properties),
+	.get_property	= bat_get_property,
+};
+
 static void initialize_power_supply_data(struct compal_data *data)
 {
-	data->psy.name = DRIVER_NAME;
-	data->psy.type = POWER_SUPPLY_TYPE_BATTERY;
-	data->psy.properties = compal_bat_properties;
-	data->psy.num_properties = ARRAY_SIZE(compal_bat_properties);
-	data->psy.get_property = bat_get_property;
 
 	ec_read_sequence(BAT_MANUFACTURER_NAME_ADDR,
 					data->bat_manufacturer_name,
@@ -1011,6 +1013,7 @@
 	int err;
 	struct compal_data *data;
 	struct device *hwmon_dev;
+	struct power_supply_config psy_cfg = {};
 
 	if (!extra_features)
 		return 0;
@@ -1026,9 +1029,9 @@
 	if (err)
 		return err;
 
-	hwmon_dev = hwmon_device_register_with_groups(&pdev->dev,
-						      "compal", data,
-						      compal_hwmon_groups);
+	hwmon_dev = devm_hwmon_device_register_with_groups(&pdev->dev,
+							   "compal", data,
+							   compal_hwmon_groups);
 	if (IS_ERR(hwmon_dev)) {
 		err = PTR_ERR(hwmon_dev);
 		goto remove;
@@ -1036,7 +1039,13 @@
 
 	/* Power supply */
 	initialize_power_supply_data(data);
-	power_supply_register(&compal_device->dev, &data->psy);
+	psy_cfg.drv_data = data;
+	data->psy = power_supply_register(&compal_device->dev, &psy_bat_desc,
+					  &psy_cfg);
+	if (IS_ERR(data->psy)) {
+		err = PTR_ERR(data->psy);
+		goto remove;
+	}
 
 	platform_set_drvdata(pdev, data);
 
@@ -1071,7 +1080,7 @@
 	pwm_disable_control();
 
 	data = platform_get_drvdata(pdev);
-	power_supply_unregister(&data->psy);
+	power_supply_unregister(data->psy);
 
 	sysfs_remove_group(&pdev->dev.kobj, &compal_platform_attr_group);
 
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index ebf0d67..943c1cb 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -246,13 +246,16 @@
 	 */
 	for_each_pci_dev(pdev) {
 		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-			unsigned long type;
+			unsigned long flags, type;
 
-			type = pci_resource_flags(pdev, i) &
-					(IORESOURCE_IO | IORESOURCE_MEM);
+			flags = pci_resource_flags(pdev, i);
+			type = flags & (IORESOURCE_IO | IORESOURCE_MEM);
 			if (!type || pci_resource_len(pdev, i) == 0)
 				continue;
 
+			if (flags & IORESOURCE_UNSET)
+				continue;
+
 			pci_start = pci_resource_start(pdev, i);
 			pci_end = pci_resource_end(pdev, i);
 			for (j = 0;
diff --git a/drivers/power/88pm860x_battery.c b/drivers/power/88pm860x_battery.c
index bd3c997..d49579b 100644
--- a/drivers/power/88pm860x_battery.c
+++ b/drivers/power/88pm860x_battery.c
@@ -98,7 +98,7 @@
 	struct i2c_client *i2c;
 	struct device *dev;
 
-	struct power_supply battery;
+	struct power_supply *battery;
 	struct mutex lock;
 	int status;
 	int irq_cc;
@@ -798,9 +798,8 @@
 
 static void pm860x_external_power_changed(struct power_supply *psy)
 {
-	struct pm860x_battery_info *info;
+	struct pm860x_battery_info *info = dev_get_drvdata(psy->dev.parent);
 
-	info = container_of(psy, struct pm860x_battery_info, battery);
 	calc_resistor(info);
 }
 
@@ -808,7 +807,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct pm860x_battery_info *info = dev_get_drvdata(psy->dev->parent);
+	struct pm860x_battery_info *info = dev_get_drvdata(psy->dev.parent);
 	int data;
 	int ret;
 
@@ -874,7 +873,7 @@
 				       enum power_supply_property psp,
 				       const union power_supply_propval *val)
 {
-	struct pm860x_battery_info *info = dev_get_drvdata(psy->dev->parent);
+	struct pm860x_battery_info *info = dev_get_drvdata(psy->dev.parent);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
@@ -901,6 +900,16 @@
 	POWER_SUPPLY_PROP_TEMP,
 };
 
+static const struct power_supply_desc pm860x_battery_desc = {
+	.name			= "battery-monitor",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= pm860x_batt_props,
+	.num_properties		= ARRAY_SIZE(pm860x_batt_props),
+	.get_property		= pm860x_batt_get_prop,
+	.set_property		= pm860x_batt_set_prop,
+	.external_power_changed	= pm860x_external_power_changed,
+};
+
 static int pm860x_battery_probe(struct platform_device *pdev)
 {
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -936,14 +945,6 @@
 
 	pm860x_init_battery(info);
 
-	info->battery.name = "battery-monitor";
-	info->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	info->battery.properties = pm860x_batt_props;
-	info->battery.num_properties = ARRAY_SIZE(pm860x_batt_props);
-	info->battery.get_property = pm860x_batt_get_prop;
-	info->battery.set_property = pm860x_batt_set_prop;
-	info->battery.external_power_changed = pm860x_external_power_changed;
-
 	if (pdata && pdata->max_capacity)
 		info->max_capacity = pdata->max_capacity;
 	else
@@ -953,10 +954,11 @@
 	else
 		info->resistor = 300;	/* set default internal resistor */
 
-	ret = power_supply_register(&pdev->dev, &info->battery);
-	if (ret)
-		return ret;
-	info->battery.dev->parent = &pdev->dev;
+	info->battery = power_supply_register(&pdev->dev, &pm860x_battery_desc,
+					      NULL);
+	if (IS_ERR(info->battery))
+		return PTR_ERR(info->battery);
+	info->battery->dev.parent = &pdev->dev;
 
 	ret = request_threaded_irq(info->irq_cc, NULL,
 				pm860x_coulomb_handler, IRQF_ONESHOT,
@@ -981,7 +983,7 @@
 out_coulomb:
 	free_irq(info->irq_cc, info);
 out_reg:
-	power_supply_unregister(&info->battery);
+	power_supply_unregister(info->battery);
 	return ret;
 }
 
@@ -991,7 +993,7 @@
 
 	free_irq(info->irq_batt, info);
 	free_irq(info->irq_cc, info);
-	power_supply_unregister(&info->battery);
+	power_supply_unregister(info->battery);
 	return 0;
 }
 
diff --git a/drivers/power/88pm860x_charger.c b/drivers/power/88pm860x_charger.c
index 734ec4a..0e448c6 100644
--- a/drivers/power/88pm860x_charger.c
+++ b/drivers/power/88pm860x_charger.c
@@ -102,7 +102,7 @@
 	struct i2c_client *i2c_8606;
 	struct device *dev;
 
-	struct power_supply usb;
+	struct power_supply *usb;
 	struct mutex lock;
 	int irq_nums;
 	int irq[7];
@@ -296,14 +296,20 @@
 	psy = power_supply_get_by_name(pm860x_supplied_to[0]);
 	if (!psy)
 		return -EINVAL;
-	ret = psy->get_property(psy, POWER_SUPPLY_PROP_VOLTAGE_NOW, &data);
-	if (ret)
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_VOLTAGE_NOW,
+			&data);
+	if (ret) {
+		power_supply_put(psy);
 		return ret;
+	}
 	vbatt = data.intval / 1000;
 
-	ret = psy->get_property(psy, POWER_SUPPLY_PROP_PRESENT, &data);
-	if (ret)
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_PRESENT, &data);
+	if (ret) {
+		power_supply_put(psy);
 		return ret;
+	}
+	power_supply_put(psy);
 
 	mutex_lock(&info->lock);
 	info->present = data.intval;
@@ -414,7 +420,7 @@
 
 	set_charging_fsm(info);
 
-	power_supply_changed(&info->usb);
+	power_supply_changed(info->usb);
 out:
 	return IRQ_HANDLED;
 }
@@ -430,7 +436,7 @@
 	psy = power_supply_get_by_name(pm860x_supplied_to[0]);
 	if (!psy)
 		goto out;
-	ret = psy->get_property(psy, POWER_SUPPLY_PROP_TEMP, &temp);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &temp);
 	if (ret)
 		goto out;
 	value = temp.intval / 10;
@@ -446,6 +452,7 @@
 
 	set_charging_fsm(info);
 out:
+	power_supply_put(psy);
 	return IRQ_HANDLED;
 }
 
@@ -485,9 +492,10 @@
 	psy = power_supply_get_by_name(pm860x_supplied_to[0]);
 	if (!psy)
 		goto out;
-	ret = psy->get_property(psy, POWER_SUPPLY_PROP_VOLTAGE_NOW, &val);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_VOLTAGE_NOW,
+			&val);
 	if (ret)
-		goto out;
+		goto out_psy_put;
 	vbatt = val.intval / 1000;
 	/*
 	 * CHG_DONE interrupt is faster than CHG_DET interrupt when
@@ -498,10 +506,13 @@
 	 */
 	ret = pm860x_reg_read(info->i2c, PM8607_STATUS_2);
 	if (ret < 0)
-		goto out;
+		goto out_psy_put;
 	if (vbatt > CHARGE_THRESHOLD && ret & STATUS2_CHG)
-		psy->set_property(psy, POWER_SUPPLY_PROP_CHARGE_FULL, &val);
+		power_supply_set_property(psy, POWER_SUPPLY_PROP_CHARGE_FULL,
+				&val);
 
+out_psy_put:
+	power_supply_put(psy);
 out:
 	mutex_unlock(&info->lock);
 	dev_dbg(info->dev, "%s, Allowed: %d\n", __func__, info->allowed);
@@ -584,8 +595,7 @@
 			       enum power_supply_property psp,
 			       union power_supply_propval *val)
 {
-	struct pm860x_charger_info *info =
-	    dev_get_drvdata(psy->dev->parent);
+	struct pm860x_charger_info *info = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -645,9 +655,18 @@
 	{ "vchg", pm860x_vchg_handler },
 };
 
+static const struct power_supply_desc pm860x_charger_desc = {
+	.name		= "usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= pm860x_usb_props,
+	.num_properties	= ARRAY_SIZE(pm860x_usb_props),
+	.get_property	= pm860x_usb_get_prop,
+};
+
 static int pm860x_charger_probe(struct platform_device *pdev)
 {
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct power_supply_config psy_cfg = {};
 	struct pm860x_charger_info *info;
 	int ret;
 	int count;
@@ -685,16 +704,15 @@
 	mutex_init(&info->lock);
 	platform_set_drvdata(pdev, info);
 
-	info->usb.name = "usb";
-	info->usb.type = POWER_SUPPLY_TYPE_USB;
-	info->usb.supplied_to = pm860x_supplied_to;
-	info->usb.num_supplicants = ARRAY_SIZE(pm860x_supplied_to);
-	info->usb.properties = pm860x_usb_props;
-	info->usb.num_properties = ARRAY_SIZE(pm860x_usb_props);
-	info->usb.get_property = pm860x_usb_get_prop;
-	ret = power_supply_register(&pdev->dev, &info->usb);
-	if (ret)
+	psy_cfg.drv_data = info;
+	psy_cfg.supplied_to = pm860x_supplied_to;
+	psy_cfg.num_supplicants = ARRAY_SIZE(pm860x_supplied_to);
+	info->usb = power_supply_register(&pdev->dev, &pm860x_charger_desc,
+					  &psy_cfg);
+	if (IS_ERR(info->usb)) {
+		ret = PTR_ERR(info->usb);
 		goto out;
+	}
 
 	pm860x_init_charger(info);
 
@@ -711,7 +729,7 @@
 	return 0;
 
 out_irq:
-	power_supply_unregister(&info->usb);
+	power_supply_unregister(info->usb);
 	while (--i >= 0)
 		free_irq(info->irq[i], info);
 out:
@@ -723,7 +741,7 @@
 	struct pm860x_charger_info *info = platform_get_drvdata(pdev);
 	int i;
 
-	power_supply_unregister(&info->usb);
+	power_supply_unregister(info->usb);
 	free_irq(info->irq[0], info);
 	for (i = 0; i < info->irq_nums; i++)
 		free_irq(info->irq[i], info);
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 27b751b..4091fb0 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -192,6 +192,27 @@
 	  Say Y here to enable support for batteries charger integrated into
 	  DA9052 PMIC.
 
+config CHARGER_DA9150
+	tristate "Dialog Semiconductor DA9150 Charger support"
+	depends on MFD_DA9150
+	depends on DA9150_GPADC
+	depends on IIO
+	help
+	  Say Y here to enable support for charger unit of the DA9150
+	  Integrated Charger & Fuel-Gauge IC.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called da9150-charger.
+
+config AXP288_FUEL_GAUGE
+	tristate "X-Powers AXP288 Fuel Gauge"
+	depends on MFD_AXP20X && IIO
+	help
+	  Say yes here to have support for X-Power power management IC (PMIC)
+	  Fuel Gauge. The device provides battery statistics and status
+	  monitoring as well as alerts for battery over/under voltage and
+	  over/under temperature.
+
 config BATTERY_MAX17040
 	tristate "Maxim MAX17040 Fuel Gauge"
 	depends on I2C
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 36f9e0d..b7b0181 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -1,4 +1,4 @@
-ccflags-$(CONFIG_POWER_SUPPLY_DEBUG) := -DDEBUG
+subdir-ccflags-$(CONFIG_POWER_SUPPLY_DEBUG) := -DDEBUG
 
 power_supply-y				:= power_supply_core.o
 power_supply-$(CONFIG_SYSFS)		+= power_supply_sysfs.o
@@ -32,6 +32,7 @@
 obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
 obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
 obj-$(CONFIG_BATTERY_DA9052)	+= da9052-battery.o
+obj-$(CONFIG_CHARGER_DA9150)	+= da9150-charger.o
 obj-$(CONFIG_BATTERY_MAX17040)	+= max17040_battery.o
 obj-$(CONFIG_BATTERY_MAX17042)	+= max17042_battery.o
 obj-$(CONFIG_BATTERY_Z2)	+= z2_battery.o
@@ -62,3 +63,4 @@
 obj-$(CONFIG_CHARGER_SMB347)	+= smb347-charger.o
 obj-$(CONFIG_CHARGER_TPS65090)	+= tps65090-charger.o
 obj-$(CONFIG_POWER_RESET)	+= reset/
+obj-$(CONFIG_AXP288_FUEL_GAUGE) += axp288_fuel_gauge.o
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 4ebf7b0..8f8044e 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -45,9 +45,6 @@
 #define BTEMP_BATCTRL_CURR_SRC_60UA	60
 #define BTEMP_BATCTRL_CURR_SRC_120UA	120
 
-#define to_ab8500_btemp_device_info(x) container_of((x), \
-	struct ab8500_btemp, btemp_psy);
-
 /**
  * struct ab8500_btemp_interrupts - ab8500 interrupts
  * @name:	name of the interrupt
@@ -102,7 +99,7 @@
 	struct ab8500_gpadc *gpadc;
 	struct ab8500_fg *fg;
 	struct abx500_bm_data *bm;
-	struct power_supply btemp_psy;
+	struct power_supply *btemp_psy;
 	struct ab8500_btemp_events events;
 	struct ab8500_btemp_ranges btemp_ranges;
 	struct workqueue_struct *btemp_wq;
@@ -654,14 +651,14 @@
 		if ((di->bat_temp != di->prev_bat_temp) || !di->initialized) {
 			di->initialized = true;
 			di->bat_temp = bat_temp;
-			power_supply_changed(&di->btemp_psy);
+			power_supply_changed(di->btemp_psy);
 		}
 	} else if (bat_temp < di->prev_bat_temp) {
 		di->bat_temp--;
-		power_supply_changed(&di->btemp_psy);
+		power_supply_changed(di->btemp_psy);
 	} else if (bat_temp > di->prev_bat_temp) {
 		di->bat_temp++;
-		power_supply_changed(&di->btemp_psy);
+		power_supply_changed(di->btemp_psy);
 	}
 	di->prev_bat_temp = bat_temp;
 
@@ -689,7 +686,7 @@
 	dev_err(di->dev, "Battery removal detected!\n");
 
 	di->events.batt_rem = true;
-	power_supply_changed(&di->btemp_psy);
+	power_supply_changed(di->btemp_psy);
 
 	return IRQ_HANDLED;
 }
@@ -715,7 +712,7 @@
 		di->events.btemp_high = false;
 		di->events.btemp_medhigh = false;
 		di->events.btemp_lowmed = false;
-		power_supply_changed(&di->btemp_psy);
+		power_supply_changed(di->btemp_psy);
 	}
 
 	return IRQ_HANDLED;
@@ -738,7 +735,7 @@
 	di->events.btemp_medhigh = false;
 	di->events.btemp_lowmed = false;
 	di->events.btemp_low = false;
-	power_supply_changed(&di->btemp_psy);
+	power_supply_changed(di->btemp_psy);
 
 	return IRQ_HANDLED;
 }
@@ -760,7 +757,7 @@
 	di->events.btemp_medhigh = false;
 	di->events.btemp_high = false;
 	di->events.btemp_low = false;
-	power_supply_changed(&di->btemp_psy);
+	power_supply_changed(di->btemp_psy);
 
 	return IRQ_HANDLED;
 }
@@ -782,7 +779,7 @@
 	di->events.btemp_lowmed = false;
 	di->events.btemp_high = false;
 	di->events.btemp_low = false;
-	power_supply_changed(&di->btemp_psy);
+	power_supply_changed(di->btemp_psy);
 
 	return IRQ_HANDLED;
 }
@@ -884,9 +881,7 @@
 	enum power_supply_property psp,
 	union power_supply_propval *val)
 {
-	struct ab8500_btemp *di;
-
-	di = to_ab8500_btemp_device_info(psy);
+	struct ab8500_btemp *di = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_PRESENT:
@@ -919,14 +914,14 @@
 
 	psy = (struct power_supply *)data;
 	ext = dev_get_drvdata(dev);
-	di = to_ab8500_btemp_device_info(psy);
+	di = power_supply_get_drvdata(psy);
 
 	/*
 	 * For all psy where the name of your driver
 	 * appears in any supplied_to
 	 */
 	for (i = 0; i < ext->num_supplicants; i++) {
-		if (!strcmp(ext->supplied_to[i], psy->name))
+		if (!strcmp(ext->supplied_to[i], psy->desc->name))
 			psy_found = true;
 	}
 
@@ -934,16 +929,16 @@
 		return 0;
 
 	/* Go through all properties for the psy */
-	for (j = 0; j < ext->num_properties; j++) {
+	for (j = 0; j < ext->desc->num_properties; j++) {
 		enum power_supply_property prop;
-		prop = ext->properties[j];
+		prop = ext->desc->properties[j];
 
-		if (ext->get_property(ext, prop, &ret))
+		if (power_supply_get_property(ext, prop, &ret))
 			continue;
 
 		switch (prop) {
 		case POWER_SUPPLY_PROP_PRESENT:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_MAINS:
 				/* AC disconnected */
 				if (!ret.intval && di->events.ac_conn) {
@@ -990,10 +985,10 @@
  */
 static void ab8500_btemp_external_power_changed(struct power_supply *psy)
 {
-	struct ab8500_btemp *di = to_ab8500_btemp_device_info(psy);
+	struct ab8500_btemp *di = power_supply_get_drvdata(psy);
 
 	class_for_each_device(power_supply_class, NULL,
-		&di->btemp_psy, ab8500_btemp_get_ext_psy_data);
+		di->btemp_psy, ab8500_btemp_get_ext_psy_data);
 }
 
 /* ab8500 btemp driver interrupts and their respective isr */
@@ -1044,7 +1039,7 @@
 	destroy_workqueue(di->btemp_wq);
 
 	flush_scheduled_work();
-	power_supply_unregister(&di->btemp_psy);
+	power_supply_unregister(di->btemp_psy);
 
 	return 0;
 }
@@ -1054,10 +1049,20 @@
 	"ab8500_fg",
 };
 
+static const struct power_supply_desc ab8500_btemp_desc = {
+	.name			= "ab8500_btemp",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= ab8500_btemp_props,
+	.num_properties		= ARRAY_SIZE(ab8500_btemp_props),
+	.get_property		= ab8500_btemp_get_property,
+	.external_power_changed	= ab8500_btemp_external_power_changed,
+};
+
 static int ab8500_btemp_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct abx500_bm_data *plat = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct ab8500_btemp *di;
 	int irq, i, ret = 0;
 	u8 val;
@@ -1089,17 +1094,9 @@
 
 	di->initialized = false;
 
-	/* BTEMP supply */
-	di->btemp_psy.name = "ab8500_btemp";
-	di->btemp_psy.type = POWER_SUPPLY_TYPE_BATTERY;
-	di->btemp_psy.properties = ab8500_btemp_props;
-	di->btemp_psy.num_properties = ARRAY_SIZE(ab8500_btemp_props);
-	di->btemp_psy.get_property = ab8500_btemp_get_property;
-	di->btemp_psy.supplied_to = supply_interface;
-	di->btemp_psy.num_supplicants = ARRAY_SIZE(supply_interface);
-	di->btemp_psy.external_power_changed =
-		ab8500_btemp_external_power_changed;
-
+	psy_cfg.supplied_to = supply_interface;
+	psy_cfg.num_supplicants = ARRAY_SIZE(supply_interface);
+	psy_cfg.drv_data = di;
 
 	/* Create a work queue for the btemp */
 	di->btemp_wq =
@@ -1140,9 +1137,11 @@
 	}
 
 	/* Register BTEMP power supply class */
-	ret = power_supply_register(di->dev, &di->btemp_psy);
-	if (ret) {
+	di->btemp_psy = power_supply_register(di->dev, &ab8500_btemp_desc,
+					      &psy_cfg);
+	if (IS_ERR(di->btemp_psy)) {
 		dev_err(di->dev, "failed to register BTEMP psy\n");
+		ret = PTR_ERR(di->btemp_psy);
 		goto free_btemp_wq;
 	}
 
@@ -1171,7 +1170,7 @@
 	return ret;
 
 free_irq:
-	power_supply_unregister(&di->btemp_psy);
+	power_supply_unregister(di->btemp_psy);
 
 	/* We also have to free all successfully registered irqs */
 	for (i = i - 1; i >= 0; i--) {
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 8c8d170..e388171 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -435,7 +435,7 @@
 		if (!connected)
 			di->flags.vbus_drop_end = false;
 
-		sysfs_notify(&di->usb_chg.psy.dev->kobj, NULL, "present");
+		sysfs_notify(&di->usb_chg.psy->dev.kobj, NULL, "present");
 
 		if (connected) {
 			mutex_lock(&di->charger_attached_mutex);
@@ -1516,7 +1516,7 @@
 
 		dev_dbg(di->dev, "%s Disabled AC charging\n", __func__);
 	}
-	ab8500_power_supply_changed(di, &di->ac_chg.psy);
+	ab8500_power_supply_changed(di, di->ac_chg.psy);
 
 	return ret;
 }
@@ -1672,7 +1672,7 @@
 		cancel_delayed_work(&di->check_vbat_work);
 
 	}
-	ab8500_power_supply_changed(di, &di->usb_chg.psy);
+	ab8500_power_supply_changed(di, di->usb_chg.psy);
 
 	return ret;
 }
@@ -1811,9 +1811,9 @@
 	int ret;
 	struct ab8500_charger *di;
 
-	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+	if (charger->psy->desc->type == POWER_SUPPLY_TYPE_MAINS)
 		di = to_ab8500_charger_ac_device_info(charger);
-	else if (charger->psy.type == POWER_SUPPLY_TYPE_USB)
+	else if (charger->psy->desc->type == POWER_SUPPLY_TYPE_USB)
 		di = to_ab8500_charger_usb_device_info(charger);
 	else
 		return -ENXIO;
@@ -1839,9 +1839,9 @@
 	int ret;
 	struct ab8500_charger *di;
 
-	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+	if (charger->psy->desc->type == POWER_SUPPLY_TYPE_MAINS)
 		di = to_ab8500_charger_ac_device_info(charger);
-	else if (charger->psy.type == POWER_SUPPLY_TYPE_USB)
+	else if (charger->psy->desc->type == POWER_SUPPLY_TYPE_USB)
 		di = to_ab8500_charger_usb_device_info(charger);
 	else
 		return -ENXIO;
@@ -1879,7 +1879,7 @@
 	int ret;
 	struct ab8500_charger *di;
 
-	if (charger->psy.type == POWER_SUPPLY_TYPE_USB)
+	if (charger->psy->desc->type == POWER_SUPPLY_TYPE_USB)
 		di = to_ab8500_charger_usb_device_info(charger);
 	else
 		return -ENXIO;
@@ -1910,7 +1910,7 @@
 	int ret;
 	struct ab8500_charger *di;
 
-	if (charger->psy.type == POWER_SUPPLY_TYPE_USB)
+	if (charger->psy->desc->type == POWER_SUPPLY_TYPE_USB)
 		di = to_ab8500_charger_usb_device_info(charger);
 	else
 		return -ENXIO;
@@ -1937,7 +1937,7 @@
 	struct ux500_charger *usb_chg;
 
 	usb_chg = (struct ux500_charger *)data;
-	psy = &usb_chg->psy;
+	psy = usb_chg->psy;
 
 	di = to_ab8500_charger_usb_device_info(usb_chg);
 
@@ -1945,7 +1945,7 @@
 
 	/* For all psy where the driver name appears in any supplied_to */
 	for (i = 0; i < ext->num_supplicants; i++) {
-		if (!strcmp(ext->supplied_to[i], psy->name))
+		if (!strcmp(ext->supplied_to[i], psy->desc->name))
 			psy_found = true;
 	}
 
@@ -1953,16 +1953,16 @@
 		return 0;
 
 	/* Go through all properties for the psy */
-	for (j = 0; j < ext->num_properties; j++) {
+	for (j = 0; j < ext->desc->num_properties; j++) {
 		enum power_supply_property prop;
-		prop = ext->properties[j];
+		prop = ext->desc->properties[j];
 
-		if (ext->get_property(ext, prop, &ret))
+		if (power_supply_get_property(ext, prop, &ret))
 			continue;
 
 		switch (prop) {
 		case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				di->vbat = ret.intval / 1000;
 				break;
@@ -1993,7 +1993,7 @@
 		struct ab8500_charger, check_vbat_work.work);
 
 	class_for_each_device(power_supply_class, NULL,
-		&di->usb_chg.psy, ab8500_charger_get_ext_psy_data);
+		di->usb_chg.psy, ab8500_charger_get_ext_psy_data);
 
 	/* First run old_vbat is 0. */
 	if (di->old_vbat == 0)
@@ -2009,7 +2009,7 @@
 			di->vbat, di->old_vbat);
 		ab8500_charger_set_vbus_in_curr(di,
 					di->max_usb_in_curr.usb_type_max);
-		power_supply_changed(&di->usb_chg.psy);
+		power_supply_changed(di->usb_chg.psy);
 	}
 
 	di->old_vbat = di->vbat;
@@ -2049,7 +2049,7 @@
 		}
 		if (!(reg_value & MAIN_CH_NOK)) {
 			di->flags.mainextchnotok = false;
-			ab8500_power_supply_changed(di, &di->ac_chg.psy);
+			ab8500_power_supply_changed(di, di->ac_chg.psy);
 		}
 	}
 	if (di->flags.vbus_ovv) {
@@ -2062,7 +2062,7 @@
 		}
 		if (!(reg_value & VBUS_OVV_TH)) {
 			di->flags.vbus_ovv = false;
-			ab8500_power_supply_changed(di, &di->usb_chg.psy);
+			ab8500_power_supply_changed(di, di->usb_chg.psy);
 		}
 	}
 	/* If we still have a failure, schedule a new check */
@@ -2132,8 +2132,8 @@
 		di->ac.charger_connected = 0;
 	}
 
-	ab8500_power_supply_changed(di, &di->ac_chg.psy);
-	sysfs_notify(&di->ac_chg.psy.dev->kobj, NULL, "present");
+	ab8500_power_supply_changed(di, di->ac_chg.psy);
+	sysfs_notify(&di->ac_chg.psy->dev.kobj, NULL, "present");
 }
 
 static void ab8500_charger_usb_attached_work(struct work_struct *work)
@@ -2240,7 +2240,7 @@
 		dev_dbg(di->dev, "%s di->vbus_detected = false\n", __func__);
 		di->vbus_detected = false;
 		ab8500_charger_set_usb_connected(di, false);
-		ab8500_power_supply_changed(di, &di->usb_chg.psy);
+		ab8500_power_supply_changed(di, di->usb_chg.psy);
 	} else {
 		dev_dbg(di->dev, "%s di->vbus_detected = true\n", __func__);
 		di->vbus_detected = true;
@@ -2250,7 +2250,7 @@
 			if (!ret) {
 				ab8500_charger_set_usb_connected(di, true);
 				ab8500_power_supply_changed(di,
-							    &di->usb_chg.psy);
+							    di->usb_chg.psy);
 			}
 		} else {
 			/*
@@ -2267,7 +2267,7 @@
 					ab8500_charger_set_usb_connected(di,
 						true);
 					ab8500_power_supply_changed(di,
-						&di->usb_chg.psy);
+						di->usb_chg.psy);
 				}
 			}
 		}
@@ -2295,7 +2295,7 @@
 	}
 
 	ab8500_charger_set_usb_connected(di, true);
-	ab8500_power_supply_changed(di, &di->usb_chg.psy);
+	ab8500_power_supply_changed(di, di->usb_chg.psy);
 }
 
 /**
@@ -2393,7 +2393,7 @@
 	if (!(detected_chargers & USB_PW_CONN)) {
 		di->vbus_detected = false;
 		ab8500_charger_set_usb_connected(di, false);
-		ab8500_power_supply_changed(di, &di->usb_chg.psy);
+		ab8500_power_supply_changed(di, di->usb_chg.psy);
 		return;
 	}
 
@@ -2404,7 +2404,7 @@
 		if (ret == -ENXIO) {
 			/* No valid charger type detected */
 			ab8500_charger_set_usb_connected(di, false);
-			ab8500_power_supply_changed(di, &di->usb_chg.psy);
+			ab8500_power_supply_changed(di, di->usb_chg.psy);
 		}
 		return;
 	}
@@ -2463,7 +2463,7 @@
 	case AB8500_BM_USB_STATE_SUSPEND:
 	case AB8500_BM_USB_STATE_MAX:
 		ab8500_charger_set_usb_connected(di, false);
-		ab8500_power_supply_changed(di, &di->usb_chg.psy);
+		ab8500_power_supply_changed(di, di->usb_chg.psy);
 		break;
 
 	case AB8500_BM_USB_STATE_RESUME:
@@ -2486,7 +2486,7 @@
 				return;
 
 			ab8500_charger_set_usb_connected(di, true);
-			ab8500_power_supply_changed(di, &di->usb_chg.psy);
+			ab8500_power_supply_changed(di, di->usb_chg.psy);
 		}
 		break;
 
@@ -2530,7 +2530,7 @@
 	}
 
 	if (prev_status != di->flags.usbchargernotok)
-		ab8500_power_supply_changed(di, &di->usb_chg.psy);
+		ab8500_power_supply_changed(di, di->usb_chg.psy);
 }
 
 /**
@@ -2560,7 +2560,7 @@
 	else
 		di->flags.main_thermal_prot = false;
 
-	ab8500_power_supply_changed(di, &di->ac_chg.psy);
+	ab8500_power_supply_changed(di, di->ac_chg.psy);
 }
 
 /**
@@ -2590,7 +2590,7 @@
 	else
 		di->flags.usb_thermal_prot = false;
 
-	ab8500_power_supply_changed(di, &di->usb_chg.psy);
+	ab8500_power_supply_changed(di, di->usb_chg.psy);
 }
 
 /**
@@ -2651,7 +2651,7 @@
 
 	dev_dbg(di->dev, "Main charger not ok\n");
 	di->flags.mainextchnotok = true;
-	ab8500_power_supply_changed(di, &di->ac_chg.psy);
+	ab8500_power_supply_changed(di, di->ac_chg.psy);
 
 	/* Schedule a new HW failure check */
 	queue_delayed_work(di->charger_wq, &di->check_hw_failure_work, 0);
@@ -2880,11 +2880,11 @@
 	 */
 	if (di->ac.charger_online) {
 		di->ac.wd_expired = true;
-		ab8500_power_supply_changed(di, &di->ac_chg.psy);
+		ab8500_power_supply_changed(di, di->ac_chg.psy);
 	}
 	if (di->usb.charger_online) {
 		di->usb.wd_expired = true;
-		ab8500_power_supply_changed(di, &di->usb_chg.psy);
+		ab8500_power_supply_changed(di, di->usb_chg.psy);
 	}
 
 	return IRQ_HANDLED;
@@ -2927,7 +2927,7 @@
 
 	dev_dbg(di->dev, "VBUS overvoltage detected\n");
 	di->flags.vbus_ovv = true;
-	ab8500_power_supply_changed(di, &di->usb_chg.psy);
+	ab8500_power_supply_changed(di, di->usb_chg.psy);
 
 	/* Schedule a new HW failure check */
 	queue_delayed_work(di->charger_wq, &di->check_hw_failure_work, 0);
@@ -3428,10 +3428,10 @@
 
 	flush_scheduled_work();
 	if (di->usb_chg.enabled)
-		power_supply_unregister(&di->usb_chg.psy);
+		power_supply_unregister(di->usb_chg.psy);
 
 	if (di->ac_chg.enabled && !di->ac_chg.external)
-		power_supply_unregister(&di->ac_chg.psy);
+		power_supply_unregister(di->ac_chg.psy);
 
 	return 0;
 }
@@ -3442,10 +3442,27 @@
 	"ab8500_btemp",
 };
 
+static const struct power_supply_desc ab8500_ac_chg_desc = {
+	.name		= "ab8500_ac",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= ab8500_charger_ac_props,
+	.num_properties	= ARRAY_SIZE(ab8500_charger_ac_props),
+	.get_property	= ab8500_charger_ac_get_property,
+};
+
+static const struct power_supply_desc ab8500_usb_chg_desc = {
+	.name		= "ab8500_usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= ab8500_charger_usb_props,
+	.num_properties	= ARRAY_SIZE(ab8500_charger_usb_props),
+	.get_property	= ab8500_charger_usb_get_property,
+};
+
 static int ab8500_charger_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct abx500_bm_data *plat = pdev->dev.platform_data;
+	struct power_supply_config ac_psy_cfg = {}, usb_psy_cfg = {};
 	struct ab8500_charger *di;
 	int irq, i, charger_status, ret = 0, ch_stat;
 
@@ -3483,15 +3500,15 @@
 	di->autopower = false;
 	di->invalid_charger_detect_state = 0;
 
+	/* AC and USB supply config */
+	ac_psy_cfg.supplied_to = supply_interface;
+	ac_psy_cfg.num_supplicants = ARRAY_SIZE(supply_interface);
+	ac_psy_cfg.drv_data = &di->ac_chg;
+	usb_psy_cfg.supplied_to = supply_interface;
+	usb_psy_cfg.num_supplicants = ARRAY_SIZE(supply_interface);
+	usb_psy_cfg.drv_data = &di->usb_chg;
+
 	/* AC supply */
-	/* power_supply base class */
-	di->ac_chg.psy.name = "ab8500_ac";
-	di->ac_chg.psy.type = POWER_SUPPLY_TYPE_MAINS;
-	di->ac_chg.psy.properties = ab8500_charger_ac_props;
-	di->ac_chg.psy.num_properties = ARRAY_SIZE(ab8500_charger_ac_props);
-	di->ac_chg.psy.get_property = ab8500_charger_ac_get_property;
-	di->ac_chg.psy.supplied_to = supply_interface;
-	di->ac_chg.psy.num_supplicants = ARRAY_SIZE(supply_interface),
 	/* ux500_charger sub-class */
 	di->ac_chg.ops.enable = &ab8500_charger_ac_en;
 	di->ac_chg.ops.check_enable = &ab8500_charger_ac_check_enable;
@@ -3511,14 +3528,6 @@
 			&charger_notifier_list, &charger_nb);
 
 	/* USB supply */
-	/* power_supply base class */
-	di->usb_chg.psy.name = "ab8500_usb";
-	di->usb_chg.psy.type = POWER_SUPPLY_TYPE_USB;
-	di->usb_chg.psy.properties = ab8500_charger_usb_props;
-	di->usb_chg.psy.num_properties = ARRAY_SIZE(ab8500_charger_usb_props);
-	di->usb_chg.psy.get_property = ab8500_charger_usb_get_property;
-	di->usb_chg.psy.supplied_to = supply_interface;
-	di->usb_chg.psy.num_supplicants = ARRAY_SIZE(supply_interface),
 	/* ux500_charger sub-class */
 	di->usb_chg.ops.enable = &ab8500_charger_usb_en;
 	di->usb_chg.ops.check_enable = &ab8500_charger_usb_check_enable;
@@ -3616,18 +3625,24 @@
 
 	/* Register AC charger class */
 	if (di->ac_chg.enabled) {
-		ret = power_supply_register(di->dev, &di->ac_chg.psy);
-		if (ret) {
+		di->ac_chg.psy = power_supply_register(di->dev,
+						       &ab8500_ac_chg_desc,
+						       &ac_psy_cfg);
+		if (IS_ERR(di->ac_chg.psy)) {
 			dev_err(di->dev, "failed to register AC charger\n");
+			ret = PTR_ERR(di->ac_chg.psy);
 			goto free_charger_wq;
 		}
 	}
 
 	/* Register USB charger class */
 	if (di->usb_chg.enabled) {
-		ret = power_supply_register(di->dev, &di->usb_chg.psy);
-		if (ret) {
+		di->usb_chg.psy = power_supply_register(di->dev,
+							&ab8500_usb_chg_desc,
+							&usb_psy_cfg);
+		if (IS_ERR(di->usb_chg.psy)) {
 			dev_err(di->dev, "failed to register USB charger\n");
+			ret = PTR_ERR(di->usb_chg.psy);
 			goto free_ac;
 		}
 	}
@@ -3650,8 +3665,8 @@
 	if (charger_status & AC_PW_CONN) {
 		di->ac.charger_connected = 1;
 		di->ac_conn = true;
-		ab8500_power_supply_changed(di, &di->ac_chg.psy);
-		sysfs_notify(&di->ac_chg.psy.dev->kobj, NULL, "present");
+		ab8500_power_supply_changed(di, di->ac_chg.psy);
+		sysfs_notify(&di->ac_chg.psy->dev.kobj, NULL, "present");
 	}
 
 	if (charger_status & USB_PW_CONN) {
@@ -3712,10 +3727,10 @@
 	usb_put_phy(di->usb_phy);
 free_usb:
 	if (di->usb_chg.enabled)
-		power_supply_unregister(&di->usb_chg.psy);
+		power_supply_unregister(di->usb_chg.psy);
 free_ac:
 	if (di->ac_chg.enabled)
-		power_supply_unregister(&di->ac_chg.psy);
+		power_supply_unregister(di->ac_chg.psy);
 free_charger_wq:
 	destroy_workqueue(di->charger_wq);
 	return ret;
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index c908658..3830dad 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -57,9 +57,6 @@
 #define interpolate(x, x1, y1, x2, y2) \
 	((y1) + ((((y2) - (y1)) * ((x) - (x1))) / ((x2) - (x1))));
 
-#define to_ab8500_fg_device_info(x) container_of((x), \
-	struct ab8500_fg, fg_psy);
-
 /**
  * struct ab8500_fg_interrupts - ab8500 fg interupts
  * @name:	name of the interrupt
@@ -229,7 +226,7 @@
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
 	struct abx500_bm_data *bm;
-	struct power_supply fg_psy;
+	struct power_supply *fg_psy;
 	struct workqueue_struct *fg_wq;
 	struct delayed_work fg_periodic_work;
 	struct delayed_work fg_low_bat_work;
@@ -622,14 +619,14 @@
 	u8 low, high;
 	int val;
 	int ret;
-	int timeout;
+	unsigned long timeout;
 
 	if (!completion_done(&di->ab8500_fg_complete)) {
 		timeout = wait_for_completion_timeout(
 			&di->ab8500_fg_complete,
 			INS_CURR_TIMEOUT);
 		dev_dbg(di->dev, "Finalize time: %d ms\n",
-			((INS_CURR_TIMEOUT - timeout) * 1000) / HZ);
+			jiffies_to_msecs(INS_CURR_TIMEOUT - timeout));
 		if (!timeout) {
 			ret = -ETIME;
 			disable_irq(di->irq);
@@ -716,7 +713,7 @@
 int ab8500_fg_inst_curr_blocking(struct ab8500_fg *di)
 {
 	int ret;
-	int timeout;
+	unsigned long timeout;
 	int res = 0;
 
 	ret = ab8500_fg_inst_curr_start(di);
@@ -731,7 +728,7 @@
 			&di->ab8500_fg_started,
 			INS_CURR_TIMEOUT);
 		dev_dbg(di->dev, "Start time: %d ms\n",
-			((INS_CURR_TIMEOUT - timeout) * 1000) / HZ);
+			jiffies_to_msecs(INS_CURR_TIMEOUT - timeout));
 		if (!timeout) {
 			ret = -ETIME;
 			dev_err(di->dev, "completion timed out [%d]\n",
@@ -1391,7 +1388,7 @@
 				di->bat_cap.prev_percent,
 				di->bat_cap.cap_scale.scaled_cap);
 		}
-		power_supply_changed(&di->fg_psy);
+		power_supply_changed(di->fg_psy);
 		if (di->flags.fully_charged && di->flags.force_full) {
 			dev_dbg(di->dev, "Battery full, notifying.\n");
 			di->flags.force_full = false;
@@ -1850,7 +1847,7 @@
 		if (!di->flags.bat_ovv) {
 			dev_dbg(di->dev, "Battery OVV\n");
 			di->flags.bat_ovv = true;
-			power_supply_changed(&di->fg_psy);
+			power_supply_changed(di->fg_psy);
 		}
 		/* Not yet recovered from ovv, reschedule this test */
 		queue_delayed_work(di->fg_wq, &di->fg_check_hw_failure_work,
@@ -1858,7 +1855,7 @@
 		} else {
 			dev_dbg(di->dev, "Battery recovered from OVV\n");
 			di->flags.bat_ovv = false;
-			power_supply_changed(&di->fg_psy);
+			power_supply_changed(di->fg_psy);
 	}
 }
 
@@ -2096,9 +2093,7 @@
 	enum power_supply_property psp,
 	union power_supply_propval *val)
 {
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	/*
 	 * If battery is identified as unknown and charging of unknown
@@ -2181,14 +2176,14 @@
 
 	psy = (struct power_supply *)data;
 	ext = dev_get_drvdata(dev);
-	di = to_ab8500_fg_device_info(psy);
+	di = power_supply_get_drvdata(psy);
 
 	/*
 	 * For all psy where the name of your driver
 	 * appears in any supplied_to
 	 */
 	for (i = 0; i < ext->num_supplicants; i++) {
-		if (!strcmp(ext->supplied_to[i], psy->name))
+		if (!strcmp(ext->supplied_to[i], psy->desc->name))
 			psy_found = true;
 	}
 
@@ -2196,16 +2191,16 @@
 		return 0;
 
 	/* Go through all properties for the psy */
-	for (j = 0; j < ext->num_properties; j++) {
+	for (j = 0; j < ext->desc->num_properties; j++) {
 		enum power_supply_property prop;
-		prop = ext->properties[j];
+		prop = ext->desc->properties[j];
 
-		if (ext->get_property(ext, prop, &ret))
+		if (power_supply_get_property(ext, prop, &ret))
 			continue;
 
 		switch (prop) {
 		case POWER_SUPPLY_PROP_STATUS:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				switch (ret.intval) {
 				case POWER_SUPPLY_STATUS_UNKNOWN:
@@ -2244,7 +2239,7 @@
 			};
 			break;
 		case POWER_SUPPLY_PROP_TECHNOLOGY:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				if (!di->flags.batt_id_received &&
 				    di->bm->batt_id != BATTERY_UNKNOWN) {
@@ -2274,7 +2269,7 @@
 			}
 			break;
 		case POWER_SUPPLY_PROP_TEMP:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				if (di->flags.batt_id_received)
 					di->bat_temp = ret.intval;
@@ -2399,10 +2394,10 @@
  */
 static void ab8500_fg_external_power_changed(struct power_supply *psy)
 {
-	struct ab8500_fg *di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	class_for_each_device(power_supply_class, NULL,
-		&di->fg_psy, ab8500_fg_get_ext_psy_data);
+		di->fg_psy, ab8500_fg_get_ext_psy_data);
 }
 
 /**
@@ -2580,9 +2575,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 		AB8505_RTC_PCUT_FLAG_TIME_REG, &reg_value);
@@ -2605,9 +2598,7 @@
 	int ret;
 	long unsigned reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	reg_value = simple_strtoul(buf, NULL, 10);
 
@@ -2633,9 +2624,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 		AB8505_RTC_PCUT_MAX_TIME_REG, &reg_value);
@@ -2659,9 +2648,7 @@
 	int ret;
 	int reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	reg_value = simple_strtoul(buf, NULL, 10);
 	if (reg_value > 0x7F) {
@@ -2686,9 +2673,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 		AB8505_RTC_PCUT_RESTART_REG, &reg_value);
@@ -2711,9 +2696,7 @@
 	int ret;
 	int reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	reg_value = simple_strtoul(buf, NULL, 10);
 	if (reg_value > 0xF) {
@@ -2739,9 +2722,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 						AB8505_RTC_PCUT_TIME_REG, &reg_value);
@@ -2764,9 +2745,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 						AB8505_RTC_PCUT_RESTART_REG, &reg_value);
@@ -2789,9 +2768,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 						AB8505_RTC_PCUT_CTL_STATUS_REG, &reg_value);
@@ -2812,9 +2789,7 @@
 	int ret;
 	int reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	reg_value = simple_strtoul(buf, NULL, 10);
 	if (reg_value > 0x1) {
@@ -2840,9 +2815,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 						AB8505_RTC_PCUT_CTL_STATUS_REG,  &reg_value);
@@ -2865,9 +2838,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 						AB8505_RTC_PCUT_DEBOUNCE_REG,  &reg_value);
@@ -2890,9 +2861,7 @@
 	int ret;
 	int reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	reg_value = simple_strtoul(buf, NULL, 10);
 	if (reg_value > 0x7) {
@@ -2917,9 +2886,7 @@
 	int ret;
 	u8 reg_value;
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
+	struct ab8500_fg *di = power_supply_get_drvdata(psy);
 
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 						AB8505_RTC_PCUT_CTL_STATUS_REG, &reg_value);
@@ -2954,44 +2921,38 @@
 		ab8505_powercut_enable_status_read, NULL),
 };
 
-static int ab8500_fg_sysfs_psy_create_attrs(struct device *dev)
+static int ab8500_fg_sysfs_psy_create_attrs(struct ab8500_fg *di)
 {
 	unsigned int i;
-	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
 
 	if (((is_ab8505(di->parent) || is_ab9540(di->parent)) &&
-	     abx500_get_chip_id(dev->parent) >= AB8500_CUT2P0)
+	     abx500_get_chip_id(di->dev) >= AB8500_CUT2P0)
 	    || is_ab8540(di->parent)) {
 		for (i = 0; i < ARRAY_SIZE(ab8505_fg_sysfs_psy_attrs); i++)
-			if (device_create_file(dev,
+			if (device_create_file(&di->fg_psy->dev,
 					       &ab8505_fg_sysfs_psy_attrs[i]))
 				goto sysfs_psy_create_attrs_failed_ab8505;
 	}
 	return 0;
 sysfs_psy_create_attrs_failed_ab8505:
-	dev_err(dev, "Failed creating sysfs psy attrs for ab8505.\n");
+	dev_err(&di->fg_psy->dev, "Failed creating sysfs psy attrs for ab8505.\n");
 	while (i--)
-		device_remove_file(dev, &ab8505_fg_sysfs_psy_attrs[i]);
+		device_remove_file(&di->fg_psy->dev,
+				   &ab8505_fg_sysfs_psy_attrs[i]);
 
 	return -EIO;
 }
 
-static void ab8500_fg_sysfs_psy_remove_attrs(struct device *dev)
+static void ab8500_fg_sysfs_psy_remove_attrs(struct ab8500_fg *di)
 {
 	unsigned int i;
-	struct power_supply *psy = dev_get_drvdata(dev);
-	struct ab8500_fg *di;
-
-	di = to_ab8500_fg_device_info(psy);
 
 	if (((is_ab8505(di->parent) || is_ab9540(di->parent)) &&
-	     abx500_get_chip_id(dev->parent) >= AB8500_CUT2P0)
+	     abx500_get_chip_id(di->dev) >= AB8500_CUT2P0)
 	    || is_ab8540(di->parent)) {
 		for (i = 0; i < ARRAY_SIZE(ab8505_fg_sysfs_psy_attrs); i++)
-			(void)device_remove_file(dev, &ab8505_fg_sysfs_psy_attrs[i]);
+			(void)device_remove_file(&di->fg_psy->dev,
+						 &ab8505_fg_sysfs_psy_attrs[i]);
 	}
 }
 
@@ -3056,17 +3017,20 @@
 	ab8500_fg_sysfs_exit(di);
 
 	flush_scheduled_work();
-	ab8500_fg_sysfs_psy_remove_attrs(di->fg_psy.dev);
-	power_supply_unregister(&di->fg_psy);
+	ab8500_fg_sysfs_psy_remove_attrs(di);
+	power_supply_unregister(di->fg_psy);
 	return ret;
 }
 
 /* ab8500 fg driver interrupts and their respective isr */
-static struct ab8500_fg_interrupts ab8500_fg_irq[] = {
+static struct ab8500_fg_interrupts ab8500_fg_irq_th[] = {
 	{"NCONV_ACCU", ab8500_fg_cc_convend_handler},
 	{"BATT_OVV", ab8500_fg_batt_ovv_handler},
 	{"LOW_BAT_F", ab8500_fg_lowbatf_handler},
 	{"CC_INT_CALIB", ab8500_fg_cc_int_calib_handler},
+};
+
+static struct ab8500_fg_interrupts ab8500_fg_irq_bh[] = {
 	{"CCEOC", ab8500_fg_cc_data_end_handler},
 };
 
@@ -3075,10 +3039,20 @@
 	"ab8500_usb",
 };
 
+static const struct power_supply_desc ab8500_fg_desc = {
+	.name			= "ab8500_fg",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= ab8500_fg_props,
+	.num_properties		= ARRAY_SIZE(ab8500_fg_props),
+	.get_property		= ab8500_fg_get_property,
+	.external_power_changed	= ab8500_fg_external_power_changed,
+};
+
 static int ab8500_fg_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct abx500_bm_data *plat = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct ab8500_fg *di;
 	int i, irq;
 	int ret = 0;
@@ -3110,14 +3084,9 @@
 	di->parent = dev_get_drvdata(pdev->dev.parent);
 	di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 
-	di->fg_psy.name = "ab8500_fg";
-	di->fg_psy.type = POWER_SUPPLY_TYPE_BATTERY;
-	di->fg_psy.properties = ab8500_fg_props;
-	di->fg_psy.num_properties = ARRAY_SIZE(ab8500_fg_props);
-	di->fg_psy.get_property = ab8500_fg_get_property;
-	di->fg_psy.supplied_to = supply_interface;
-	di->fg_psy.num_supplicants = ARRAY_SIZE(supply_interface),
-	di->fg_psy.external_power_changed = ab8500_fg_external_power_changed;
+	psy_cfg.supplied_to = supply_interface;
+	psy_cfg.num_supplicants = ARRAY_SIZE(supply_interface);
+	psy_cfg.drv_data = di;
 
 	di->bat_cap.max_mah_design = MILLI_TO_MICRO *
 		di->bm->bat_type[di->bm->batt_id].charge_full_design;
@@ -3178,9 +3147,10 @@
 	di->flags.batt_id_received = false;
 
 	/* Register FG power supply class */
-	ret = power_supply_register(di->dev, &di->fg_psy);
-	if (ret) {
+	di->fg_psy = power_supply_register(di->dev, &ab8500_fg_desc, &psy_cfg);
+	if (IS_ERR(di->fg_psy)) {
 		dev_err(di->dev, "failed to register FG psy\n");
+		ret = PTR_ERR(di->fg_psy);
 		goto free_inst_curr_wq;
 	}
 
@@ -3194,21 +3164,36 @@
 	init_completion(&di->ab8500_fg_started);
 	init_completion(&di->ab8500_fg_complete);
 
-	/* Register interrupts */
-	for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq); i++) {
-		irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name);
-		ret = request_threaded_irq(irq, NULL, ab8500_fg_irq[i].isr,
-			IRQF_SHARED | IRQF_NO_SUSPEND,
-			ab8500_fg_irq[i].name, di);
+	/* Register primary interrupt handlers */
+	for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq_th); i++) {
+		irq = platform_get_irq_byname(pdev, ab8500_fg_irq_th[i].name);
+		ret = request_irq(irq, ab8500_fg_irq_th[i].isr,
+				  IRQF_SHARED | IRQF_NO_SUSPEND,
+				  ab8500_fg_irq_th[i].name, di);
 
 		if (ret != 0) {
-			dev_err(di->dev, "failed to request %s IRQ %d: %d\n"
-				, ab8500_fg_irq[i].name, irq, ret);
+			dev_err(di->dev, "failed to request %s IRQ %d: %d\n",
+				ab8500_fg_irq_th[i].name, irq, ret);
 			goto free_irq;
 		}
 		dev_dbg(di->dev, "Requested %s IRQ %d: %d\n",
-			ab8500_fg_irq[i].name, irq, ret);
+			ab8500_fg_irq_th[i].name, irq, ret);
 	}
+
+	/* Register threaded interrupt handler */
+	irq = platform_get_irq_byname(pdev, ab8500_fg_irq_bh[0].name);
+	ret = request_threaded_irq(irq, NULL, ab8500_fg_irq_bh[0].isr,
+				IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
+			ab8500_fg_irq_bh[0].name, di);
+
+	if (ret != 0) {
+		dev_err(di->dev, "failed to request %s IRQ %d: %d\n",
+			ab8500_fg_irq_bh[0].name, irq, ret);
+		goto free_irq;
+	}
+	dev_dbg(di->dev, "Requested %s IRQ %d: %d\n",
+		ab8500_fg_irq_bh[0].name, irq, ret);
+
 	di->irq = platform_get_irq_byname(pdev, "CCEOC");
 	disable_irq(di->irq);
 	di->nbr_cceoc_irq_cnt = 0;
@@ -3221,7 +3206,7 @@
 		goto free_irq;
 	}
 
-	ret = ab8500_fg_sysfs_psy_create_attrs(di->fg_psy.dev);
+	ret = ab8500_fg_sysfs_psy_create_attrs(di);
 	if (ret) {
 		dev_err(di->dev, "failed to create FG psy\n");
 		ab8500_fg_sysfs_exit(di);
@@ -3243,13 +3228,15 @@
 	return ret;
 
 free_irq:
-	power_supply_unregister(&di->fg_psy);
+	power_supply_unregister(di->fg_psy);
 
-	/* We also have to free all successfully registered irqs */
-	for (i = i - 1; i >= 0; i--) {
-		irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name);
+	/* We also have to free all registered irqs */
+	for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq_th); i++) {
+		irq = platform_get_irq_byname(pdev, ab8500_fg_irq_th[i].name);
 		free_irq(irq, di);
 	}
+	irq = platform_get_irq_byname(pdev, ab8500_fg_irq_bh[0].name);
+	free_irq(irq, di);
 free_inst_curr_wq:
 	destroy_workqueue(di->fg_wq);
 	return ret;
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index ab54b8d..541f702 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -50,9 +50,6 @@
 #define CHARGALG_CURR_STEP_LOW		0
 #define CHARGALG_CURR_STEP_HIGH	100
 
-#define to_abx500_chargalg_device_info(x) container_of((x), \
-	struct abx500_chargalg, chargalg_psy);
-
 enum abx500_chargers {
 	NO_CHG,
 	AC_CHG,
@@ -256,7 +253,7 @@
 	struct ab8500 *parent;
 	struct abx500_chargalg_current_step_status curr_status;
 	struct abx500_bm_data *bm;
-	struct power_supply chargalg_psy;
+	struct power_supply *chargalg_psy;
 	struct ux500_charger *ac_chg;
 	struct ux500_charger *usb_chg;
 	struct abx500_chargalg_events events;
@@ -695,7 +692,7 @@
 	di->charge_status = POWER_SUPPLY_STATUS_NOT_CHARGING;
 	di->maintenance_chg = false;
 	cancel_delayed_work(&di->chargalg_wd_work);
-	power_supply_changed(&di->chargalg_psy);
+	power_supply_changed(di->chargalg_psy);
 }
 
 /**
@@ -715,7 +712,7 @@
 	di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 	di->maintenance_chg = false;
 	cancel_delayed_work(&di->chargalg_wd_work);
-	power_supply_changed(&di->chargalg_psy);
+	power_supply_changed(di->chargalg_psy);
 }
 
 /**
@@ -842,7 +839,7 @@
 			di->charge_status = POWER_SUPPLY_STATUS_FULL;
 			di->maintenance_chg = true;
 			dev_dbg(di->dev, "EOC reached!\n");
-			power_supply_changed(&di->chargalg_psy);
+			power_supply_changed(di->chargalg_psy);
 		} else {
 			dev_dbg(di->dev,
 				" EOC limit reached for the %d"
@@ -987,10 +984,10 @@
 
 	psy = (struct power_supply *)data;
 	ext = dev_get_drvdata(dev);
-	di = to_abx500_chargalg_device_info(psy);
+	di = power_supply_get_drvdata(psy);
 	/* For all psy where the driver name appears in any supplied_to */
 	for (i = 0; i < ext->num_supplicants; i++) {
-		if (!strcmp(ext->supplied_to[i], psy->name))
+		if (!strcmp(ext->supplied_to[i], psy->desc->name))
 			psy_found = true;
 	}
 	if (!psy_found)
@@ -1001,29 +998,31 @@
 	 * property because of handling that sysfs entry on its own, this is
 	 * the place to get the battery capacity.
 	 */
-	if (!ext->get_property(ext, POWER_SUPPLY_PROP_CAPACITY, &ret)) {
+	if (!power_supply_get_property(ext, POWER_SUPPLY_PROP_CAPACITY, &ret)) {
 		di->batt_data.percent = ret.intval;
 		capacity_updated = true;
 	}
 
 	/* Go through all properties for the psy */
-	for (j = 0; j < ext->num_properties; j++) {
+	for (j = 0; j < ext->desc->num_properties; j++) {
 		enum power_supply_property prop;
-		prop = ext->properties[j];
+		prop = ext->desc->properties[j];
 
-		/* Initialize chargers if not already done */
+		/*
+		 * Initialize chargers if not already done.
+		 * The ab8500_charger*/
 		if (!di->ac_chg &&
-			ext->type == POWER_SUPPLY_TYPE_MAINS)
+			ext->desc->type == POWER_SUPPLY_TYPE_MAINS)
 			di->ac_chg = psy_to_ux500_charger(ext);
 		else if (!di->usb_chg &&
-			ext->type == POWER_SUPPLY_TYPE_USB)
+			ext->desc->type == POWER_SUPPLY_TYPE_USB)
 			di->usb_chg = psy_to_ux500_charger(ext);
 
-		if (ext->get_property(ext, prop, &ret))
+		if (power_supply_get_property(ext, prop, &ret))
 			continue;
 		switch (prop) {
 		case POWER_SUPPLY_PROP_PRESENT:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				/* Battery present */
 				if (ret.intval)
@@ -1070,7 +1069,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_ONLINE:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				break;
 			case POWER_SUPPLY_TYPE_MAINS:
@@ -1115,7 +1114,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_HEALTH:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				break;
 			case POWER_SUPPLY_TYPE_MAINS:
@@ -1198,7 +1197,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				di->batt_data.volt = ret.intval / 1000;
 				break;
@@ -1214,7 +1213,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_VOLTAGE_AVG:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_MAINS:
 				/* AVG is used to indicate when we are
 				 * in CV mode */
@@ -1239,7 +1238,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_TECHNOLOGY:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				if (ret.intval)
 					di->events.batt_unknown = false;
@@ -1257,7 +1256,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_CURRENT_NOW:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_MAINS:
 					di->chg_info.ac_curr =
 						ret.intval / 1000;
@@ -1275,7 +1274,7 @@
 			break;
 
 		case POWER_SUPPLY_PROP_CURRENT_AVG:
-			switch (ext->type) {
+			switch (ext->desc->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
 				di->batt_data.avg_curr = ret.intval / 1000;
 				break;
@@ -1311,7 +1310,7 @@
  */
 static void abx500_chargalg_external_power_changed(struct power_supply *psy)
 {
-	struct abx500_chargalg *di = to_abx500_chargalg_device_info(psy);
+	struct abx500_chargalg *di = power_supply_get_drvdata(psy);
 
 	/*
 	 * Trigger execution of the algorithm instantly and read
@@ -1336,7 +1335,7 @@
 
 	/* Collect data from all power_supply class devices */
 	class_for_each_device(power_supply_class, NULL,
-		&di->chargalg_psy, abx500_chargalg_get_ext_psy_data);
+		di->chargalg_psy, abx500_chargalg_get_ext_psy_data);
 
 	abx500_chargalg_end_of_charge(di);
 	abx500_chargalg_check_temp(di);
@@ -1478,7 +1477,7 @@
 		di->charge_status = POWER_SUPPLY_STATUS_NOT_CHARGING;
 		di->maintenance_chg = false;
 		abx500_chargalg_state_to(di, STATE_SUSPENDED);
-		power_supply_changed(&di->chargalg_psy);
+		power_supply_changed(di->chargalg_psy);
 		/* Intentional fallthrough */
 
 	case STATE_SUSPENDED:
@@ -1576,7 +1575,7 @@
 		di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 		di->eoc_cnt = 0;
 		di->maintenance_chg = false;
-		power_supply_changed(&di->chargalg_psy);
+		power_supply_changed(di->chargalg_psy);
 
 		break;
 
@@ -1624,7 +1623,7 @@
 			di->bm->bat_type[
 				di->bm->batt_id].maint_a_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_MAINTENANCE_A);
-		power_supply_changed(&di->chargalg_psy);
+		power_supply_changed(di->chargalg_psy);
 		/* Intentional fallthrough*/
 
 	case STATE_MAINTENANCE_A:
@@ -1644,7 +1643,7 @@
 			di->bm->bat_type[
 				di->bm->batt_id].maint_b_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_MAINTENANCE_B);
-		power_supply_changed(&di->chargalg_psy);
+		power_supply_changed(di->chargalg_psy);
 		/* Intentional fallthrough*/
 
 	case STATE_MAINTENANCE_B:
@@ -1663,7 +1662,7 @@
 		abx500_chargalg_stop_maintenance_timer(di);
 		di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 		abx500_chargalg_state_to(di, STATE_TEMP_LOWHIGH);
-		power_supply_changed(&di->chargalg_psy);
+		power_supply_changed(di->chargalg_psy);
 		/* Intentional fallthrough */
 
 	case STATE_TEMP_LOWHIGH:
@@ -1779,9 +1778,7 @@
 	enum power_supply_property psp,
 	union power_supply_propval *val)
 {
-	struct abx500_chargalg *di;
-
-	di = to_abx500_chargalg_device_info(psy);
+	struct abx500_chargalg *di = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -2034,7 +2031,7 @@
 	/* Delete the work queue */
 	destroy_workqueue(di->chargalg_wq);
 
-	power_supply_unregister(&di->chargalg_psy);
+	power_supply_unregister(di->chargalg_psy);
 
 	return 0;
 }
@@ -2043,10 +2040,20 @@
 	"ab8500_fg",
 };
 
+static const struct power_supply_desc abx500_chargalg_desc = {
+	.name			= "abx500_chargalg",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= abx500_chargalg_props,
+	.num_properties		= ARRAY_SIZE(abx500_chargalg_props),
+	.get_property		= abx500_chargalg_get_property,
+	.external_power_changed	= abx500_chargalg_external_power_changed,
+};
+
 static int abx500_chargalg_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct abx500_bm_data *plat = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct abx500_chargalg *di;
 	int ret = 0;
 
@@ -2074,16 +2081,9 @@
 	di->dev = &pdev->dev;
 	di->parent = dev_get_drvdata(pdev->dev.parent);
 
-	/* chargalg supply */
-	di->chargalg_psy.name = "abx500_chargalg";
-	di->chargalg_psy.type = POWER_SUPPLY_TYPE_BATTERY;
-	di->chargalg_psy.properties = abx500_chargalg_props;
-	di->chargalg_psy.num_properties = ARRAY_SIZE(abx500_chargalg_props);
-	di->chargalg_psy.get_property = abx500_chargalg_get_property;
-	di->chargalg_psy.supplied_to = supply_interface;
-	di->chargalg_psy.num_supplicants = ARRAY_SIZE(supply_interface),
-	di->chargalg_psy.external_power_changed =
-		abx500_chargalg_external_power_changed;
+	psy_cfg.supplied_to = supply_interface;
+	psy_cfg.num_supplicants = ARRAY_SIZE(supply_interface);
+	psy_cfg.drv_data = di;
 
 	/* Initilialize safety timer */
 	hrtimer_init(&di->safety_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
@@ -2115,9 +2115,11 @@
 	di->chg_info.prev_conn_chg = -1;
 
 	/* Register chargalg power supply class */
-	ret = power_supply_register(di->dev, &di->chargalg_psy);
-	if (ret) {
+	di->chargalg_psy = power_supply_register(di->dev, &abx500_chargalg_desc,
+						 &psy_cfg);
+	if (IS_ERR(di->chargalg_psy)) {
 		dev_err(di->dev, "failed to register chargalg psy\n");
+		ret = PTR_ERR(di->chargalg_psy);
 		goto free_chargalg_wq;
 	}
 
@@ -2138,7 +2140,7 @@
 	return ret;
 
 free_psy:
-	power_supply_unregister(&di->chargalg_psy);
+	power_supply_unregister(di->chargalg_psy);
 free_chargalg_wq:
 	destroy_workqueue(di->chargalg_wq);
 	return ret;
diff --git a/drivers/power/apm_power.c b/drivers/power/apm_power.c
index 39763015b..9d1a7fb 100644
--- a/drivers/power/apm_power.c
+++ b/drivers/power/apm_power.c
@@ -15,10 +15,10 @@
 #include <linux/apm-emulation.h>
 
 
-#define PSY_PROP(psy, prop, val) (psy->get_property(psy, \
+#define PSY_PROP(psy, prop, val) (power_supply_get_property(psy, \
 			 POWER_SUPPLY_PROP_##prop, val))
 
-#define _MPSY_PROP(prop, val) (main_battery->get_property(main_battery, \
+#define _MPSY_PROP(prop, val) (power_supply_get_property(main_battery, \
 							 prop, val))
 
 #define MPSY_PROP(prop, val) _MPSY_PROP(POWER_SUPPLY_PROP_##prop, val)
@@ -48,7 +48,7 @@
 
 	bp->bat = dev_get_drvdata(dev);
 
-	if (bp->bat->use_for_apm) {
+	if (bp->bat->desc->use_for_apm) {
 		/* nice, we explicitly asked to report this battery. */
 		bp->main = bp->bat;
 		return 1;
diff --git a/drivers/power/axp288_fuel_gauge.c b/drivers/power/axp288_fuel_gauge.c
new file mode 100644
index 0000000..ca1cc5a
--- /dev/null
+++ b/drivers/power/axp288_fuel_gauge.c
@@ -0,0 +1,1154 @@
+/*
+ * axp288_fuel_gauge.c - Xpower AXP288 PMIC Fuel Gauge Driver
+ *
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+#include <linux/mfd/axp20x.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/iio/consumer.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define CHRG_STAT_BAT_SAFE_MODE		(1 << 3)
+#define CHRG_STAT_BAT_VALID			(1 << 4)
+#define CHRG_STAT_BAT_PRESENT		(1 << 5)
+#define CHRG_STAT_CHARGING			(1 << 6)
+#define CHRG_STAT_PMIC_OTP			(1 << 7)
+
+#define CHRG_CCCV_CC_MASK			0xf     /* 4 bits */
+#define CHRG_CCCV_CC_BIT_POS		0
+#define CHRG_CCCV_CC_OFFSET			200     /* 200mA */
+#define CHRG_CCCV_CC_LSB_RES		200     /* 200mA */
+#define CHRG_CCCV_ITERM_20P			(1 << 4)    /* 20% of CC */
+#define CHRG_CCCV_CV_MASK			0x60        /* 2 bits */
+#define CHRG_CCCV_CV_BIT_POS		5
+#define CHRG_CCCV_CV_4100MV			0x0     /* 4.10V */
+#define CHRG_CCCV_CV_4150MV			0x1     /* 4.15V */
+#define CHRG_CCCV_CV_4200MV			0x2     /* 4.20V */
+#define CHRG_CCCV_CV_4350MV			0x3     /* 4.35V */
+#define CHRG_CCCV_CHG_EN			(1 << 7)
+
+#define CV_4100						4100    /* 4100mV */
+#define CV_4150						4150    /* 4150mV */
+#define CV_4200						4200    /* 4200mV */
+#define CV_4350						4350    /* 4350mV */
+
+#define TEMP_IRQ_CFG_QWBTU			(1 << 0)
+#define TEMP_IRQ_CFG_WBTU			(1 << 1)
+#define TEMP_IRQ_CFG_QWBTO			(1 << 2)
+#define TEMP_IRQ_CFG_WBTO			(1 << 3)
+#define TEMP_IRQ_CFG_MASK			0xf
+
+#define FG_IRQ_CFG_LOWBATT_WL2		(1 << 0)
+#define FG_IRQ_CFG_LOWBATT_WL1		(1 << 1)
+#define FG_IRQ_CFG_LOWBATT_MASK		0x3
+#define LOWBAT_IRQ_STAT_LOWBATT_WL2	(1 << 0)
+#define LOWBAT_IRQ_STAT_LOWBATT_WL1	(1 << 1)
+
+#define FG_CNTL_OCV_ADJ_STAT		(1 << 2)
+#define FG_CNTL_OCV_ADJ_EN			(1 << 3)
+#define FG_CNTL_CAP_ADJ_STAT		(1 << 4)
+#define FG_CNTL_CAP_ADJ_EN			(1 << 5)
+#define FG_CNTL_CC_EN				(1 << 6)
+#define FG_CNTL_GAUGE_EN			(1 << 7)
+
+#define FG_REP_CAP_VALID			(1 << 7)
+#define FG_REP_CAP_VAL_MASK			0x7F
+
+#define FG_DES_CAP1_VALID			(1 << 7)
+#define FG_DES_CAP1_VAL_MASK		0x7F
+#define FG_DES_CAP0_VAL_MASK		0xFF
+#define FG_DES_CAP_RES_LSB			1456    /* 1.456mAhr */
+
+#define FG_CC_MTR1_VALID			(1 << 7)
+#define FG_CC_MTR1_VAL_MASK			0x7F
+#define FG_CC_MTR0_VAL_MASK			0xFF
+#define FG_DES_CC_RES_LSB			1456    /* 1.456mAhr */
+
+#define FG_OCV_CAP_VALID			(1 << 7)
+#define FG_OCV_CAP_VAL_MASK			0x7F
+#define FG_CC_CAP_VALID				(1 << 7)
+#define FG_CC_CAP_VAL_MASK			0x7F
+
+#define FG_LOW_CAP_THR1_MASK		0xf0    /* 5% tp 20% */
+#define FG_LOW_CAP_THR1_VAL			0xa0    /* 15 perc */
+#define FG_LOW_CAP_THR2_MASK		0x0f    /* 0% to 15% */
+#define FG_LOW_CAP_WARN_THR			14  /* 14 perc */
+#define FG_LOW_CAP_CRIT_THR			4   /* 4 perc */
+#define FG_LOW_CAP_SHDN_THR			0   /* 0 perc */
+
+#define STATUS_MON_DELAY_JIFFIES    (HZ * 60)   /*60 sec */
+#define NR_RETRY_CNT    3
+#define DEV_NAME	"axp288_fuel_gauge"
+
+/* 1.1mV per LSB expressed in uV */
+#define VOLTAGE_FROM_ADC(a)			((a * 11) / 10)
+/* properties converted to tenths of degrees, uV, uA, uW */
+#define PROP_TEMP(a)		((a) * 10)
+#define UNPROP_TEMP(a)		((a) / 10)
+#define PROP_VOLT(a)		((a) * 1000)
+#define PROP_CURR(a)		((a) * 1000)
+
+#define AXP288_FG_INTR_NUM	6
+enum {
+	QWBTU_IRQ = 0,
+	WBTU_IRQ,
+	QWBTO_IRQ,
+	WBTO_IRQ,
+	WL2_IRQ,
+	WL1_IRQ,
+};
+
+struct axp288_fg_info {
+	struct platform_device *pdev;
+	struct axp20x_fg_pdata *pdata;
+	struct regmap *regmap;
+	struct regmap_irq_chip_data *regmap_irqc;
+	int irq[AXP288_FG_INTR_NUM];
+	struct power_supply *bat;
+	struct mutex lock;
+	int status;
+	struct delayed_work status_monitor;
+	struct dentry *debug_file;
+};
+
+static enum power_supply_property fuel_gauge_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_VOLTAGE_OCV,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TEMP_MAX,
+	POWER_SUPPLY_PROP_TEMP_MIN,
+	POWER_SUPPLY_PROP_TEMP_ALERT_MIN,
+	POWER_SUPPLY_PROP_TEMP_ALERT_MAX,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+};
+
+static int fuel_gauge_reg_readb(struct axp288_fg_info *info, int reg)
+{
+	int ret, i;
+	unsigned int val;
+
+	for (i = 0; i < NR_RETRY_CNT; i++) {
+		ret = regmap_read(info->regmap, reg, &val);
+		if (ret == -EBUSY)
+			continue;
+		else
+			break;
+	}
+
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "axp288 reg read err:%d\n", ret);
+
+	return val;
+}
+
+static int fuel_gauge_reg_writeb(struct axp288_fg_info *info, int reg, u8 val)
+{
+	int ret;
+
+	ret = regmap_write(info->regmap, reg, (unsigned int)val);
+
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "axp288 reg write err:%d\n", ret);
+
+	return ret;
+}
+
+static int pmic_read_adc_val(const char *name, int *raw_val,
+		struct axp288_fg_info *info)
+{
+	int ret, val = 0;
+	struct iio_channel *indio_chan;
+
+	indio_chan = iio_channel_get(NULL, name);
+	if (IS_ERR_OR_NULL(indio_chan)) {
+		ret = PTR_ERR(indio_chan);
+		goto exit;
+	}
+	ret = iio_read_channel_raw(indio_chan, &val);
+	if (ret < 0) {
+		dev_err(&info->pdev->dev,
+			"IIO channel read error: %x, %x\n", ret, val);
+		goto err_exit;
+	}
+
+	dev_dbg(&info->pdev->dev, "adc raw val=%x\n", val);
+	*raw_val = val;
+
+err_exit:
+	iio_channel_release(indio_chan);
+exit:
+	return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int fuel_gauge_debug_show(struct seq_file *s, void *data)
+{
+	struct axp288_fg_info *info = s->private;
+	int raw_val, ret;
+
+	seq_printf(s, " PWR_STATUS[%02x] : %02x\n",
+		AXP20X_PWR_INPUT_STATUS,
+		fuel_gauge_reg_readb(info, AXP20X_PWR_INPUT_STATUS));
+	seq_printf(s, "PWR_OP_MODE[%02x] : %02x\n",
+		AXP20X_PWR_OP_MODE,
+		fuel_gauge_reg_readb(info, AXP20X_PWR_OP_MODE));
+	seq_printf(s, " CHRG_CTRL1[%02x] : %02x\n",
+		AXP20X_CHRG_CTRL1,
+		fuel_gauge_reg_readb(info, AXP20X_CHRG_CTRL1));
+	seq_printf(s, "       VLTF[%02x] : %02x\n",
+		AXP20X_V_LTF_DISCHRG,
+		fuel_gauge_reg_readb(info, AXP20X_V_LTF_DISCHRG));
+	seq_printf(s, "       VHTF[%02x] : %02x\n",
+		AXP20X_V_HTF_DISCHRG,
+		fuel_gauge_reg_readb(info, AXP20X_V_HTF_DISCHRG));
+	seq_printf(s, "    CC_CTRL[%02x] : %02x\n",
+		AXP20X_CC_CTRL,
+		fuel_gauge_reg_readb(info, AXP20X_CC_CTRL));
+	seq_printf(s, "BATTERY CAP[%02x] : %02x\n",
+		AXP20X_FG_RES,
+		fuel_gauge_reg_readb(info, AXP20X_FG_RES));
+	seq_printf(s, "    FG_RDC1[%02x] : %02x\n",
+		AXP288_FG_RDC1_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_RDC1_REG));
+	seq_printf(s, "    FG_RDC0[%02x] : %02x\n",
+		AXP288_FG_RDC0_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_RDC0_REG));
+	seq_printf(s, "    FG_OCVH[%02x] : %02x\n",
+		AXP288_FG_OCVH_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_OCVH_REG));
+	seq_printf(s, "    FG_OCVL[%02x] : %02x\n",
+		AXP288_FG_OCVL_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_OCVL_REG));
+	seq_printf(s, "FG_DES_CAP1[%02x] : %02x\n",
+		AXP288_FG_DES_CAP1_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP1_REG));
+	seq_printf(s, "FG_DES_CAP0[%02x] : %02x\n",
+		AXP288_FG_DES_CAP0_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP0_REG));
+	seq_printf(s, " FG_CC_MTR1[%02x] : %02x\n",
+		AXP288_FG_CC_MTR1_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_CC_MTR1_REG));
+	seq_printf(s, " FG_CC_MTR0[%02x] : %02x\n",
+		AXP288_FG_CC_MTR0_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_CC_MTR0_REG));
+	seq_printf(s, " FG_OCV_CAP[%02x] : %02x\n",
+		AXP288_FG_OCV_CAP_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_OCV_CAP_REG));
+	seq_printf(s, "  FG_CC_CAP[%02x] : %02x\n",
+		AXP288_FG_CC_CAP_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_CC_CAP_REG));
+	seq_printf(s, " FG_LOW_CAP[%02x] : %02x\n",
+		AXP288_FG_LOW_CAP_REG,
+		fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG));
+	seq_printf(s, "TUNING_CTL0[%02x] : %02x\n",
+		AXP288_FG_TUNE0,
+		fuel_gauge_reg_readb(info, AXP288_FG_TUNE0));
+	seq_printf(s, "TUNING_CTL1[%02x] : %02x\n",
+		AXP288_FG_TUNE1,
+		fuel_gauge_reg_readb(info, AXP288_FG_TUNE1));
+	seq_printf(s, "TUNING_CTL2[%02x] : %02x\n",
+		AXP288_FG_TUNE2,
+		fuel_gauge_reg_readb(info, AXP288_FG_TUNE2));
+	seq_printf(s, "TUNING_CTL3[%02x] : %02x\n",
+		AXP288_FG_TUNE3,
+		fuel_gauge_reg_readb(info, AXP288_FG_TUNE3));
+	seq_printf(s, "TUNING_CTL4[%02x] : %02x\n",
+		AXP288_FG_TUNE4,
+		fuel_gauge_reg_readb(info, AXP288_FG_TUNE4));
+	seq_printf(s, "TUNING_CTL5[%02x] : %02x\n",
+		AXP288_FG_TUNE5,
+		fuel_gauge_reg_readb(info, AXP288_FG_TUNE5));
+
+	ret = pmic_read_adc_val("axp288-batt-temp", &raw_val, info);
+	if (ret >= 0)
+		seq_printf(s, "axp288-batttemp : %d\n", raw_val);
+	ret = pmic_read_adc_val("axp288-pmic-temp", &raw_val, info);
+	if (ret >= 0)
+		seq_printf(s, "axp288-pmictemp : %d\n", raw_val);
+	ret = pmic_read_adc_val("axp288-system-temp", &raw_val, info);
+	if (ret >= 0)
+		seq_printf(s, "axp288-systtemp : %d\n", raw_val);
+	ret = pmic_read_adc_val("axp288-chrg-curr", &raw_val, info);
+	if (ret >= 0)
+		seq_printf(s, "axp288-chrgcurr : %d\n", raw_val);
+	ret = pmic_read_adc_val("axp288-chrg-d-curr", &raw_val, info);
+	if (ret >= 0)
+		seq_printf(s, "axp288-dchrgcur : %d\n", raw_val);
+	ret = pmic_read_adc_val("axp288-batt-volt", &raw_val, info);
+	if (ret >= 0)
+		seq_printf(s, "axp288-battvolt : %d\n", raw_val);
+
+	return 0;
+}
+
+static int debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, fuel_gauge_debug_show, inode->i_private);
+}
+
+static const struct file_operations fg_debug_fops = {
+	.open       = debug_open,
+	.read       = seq_read,
+	.llseek     = seq_lseek,
+	.release    = single_release,
+};
+
+static void fuel_gauge_create_debugfs(struct axp288_fg_info *info)
+{
+	info->debug_file = debugfs_create_file("fuelgauge", 0666, NULL,
+		info, &fg_debug_fops);
+}
+
+static void fuel_gauge_remove_debugfs(struct axp288_fg_info *info)
+{
+	debugfs_remove(info->debug_file);
+}
+#else
+static inline void fuel_gauge_create_debugfs(struct axp288_fg_info *info)
+{
+}
+static inline void fuel_gauge_remove_debugfs(struct axp288_fg_info *info)
+{
+}
+#endif
+
+static void fuel_gauge_get_status(struct axp288_fg_info *info)
+{
+	int pwr_stat, ret;
+	int charge, discharge;
+
+	pwr_stat = fuel_gauge_reg_readb(info, AXP20X_PWR_INPUT_STATUS);
+	if (pwr_stat < 0) {
+		dev_err(&info->pdev->dev,
+			"PWR STAT read failed:%d\n", pwr_stat);
+		return;
+	}
+	ret = pmic_read_adc_val("axp288-chrg-curr", &charge, info);
+	if (ret < 0) {
+		dev_err(&info->pdev->dev,
+			"ADC charge current read failed:%d\n", ret);
+		return;
+	}
+	ret = pmic_read_adc_val("axp288-chrg-d-curr", &discharge, info);
+	if (ret < 0) {
+		dev_err(&info->pdev->dev,
+			"ADC discharge current read failed:%d\n", ret);
+		return;
+	}
+
+	if (charge > 0)
+		info->status = POWER_SUPPLY_STATUS_CHARGING;
+	else if (discharge > 0)
+		info->status = POWER_SUPPLY_STATUS_DISCHARGING;
+	else {
+		if (pwr_stat & CHRG_STAT_BAT_PRESENT)
+			info->status = POWER_SUPPLY_STATUS_FULL;
+		else
+			info->status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+	}
+}
+
+static int fuel_gauge_get_vbatt(struct axp288_fg_info *info, int *vbatt)
+{
+	int ret = 0, raw_val;
+
+	ret = pmic_read_adc_val("axp288-batt-volt", &raw_val, info);
+	if (ret < 0)
+		goto vbatt_read_fail;
+
+	*vbatt = VOLTAGE_FROM_ADC(raw_val);
+vbatt_read_fail:
+	return ret;
+}
+
+static int fuel_gauge_get_current(struct axp288_fg_info *info, int *cur)
+{
+	int ret, value = 0;
+	int charge, discharge;
+
+	ret = pmic_read_adc_val("axp288-chrg-curr", &charge, info);
+	if (ret < 0)
+		goto current_read_fail;
+	ret = pmic_read_adc_val("axp288-chrg-d-curr", &discharge, info);
+	if (ret < 0)
+		goto current_read_fail;
+
+	if (charge > 0)
+		value = charge;
+	else if (discharge > 0)
+		value = -1 * discharge;
+
+	*cur = value;
+current_read_fail:
+	return ret;
+}
+
+static int temp_to_adc(struct axp288_fg_info *info, int tval)
+{
+	int rntc = 0, i, ret, adc_val;
+	int rmin, rmax, tmin, tmax;
+	int tcsz = info->pdata->tcsz;
+
+	/* get the Rntc resitance value for this temp */
+	if (tval > info->pdata->thermistor_curve[0][1]) {
+		rntc = info->pdata->thermistor_curve[0][0];
+	} else if (tval <= info->pdata->thermistor_curve[tcsz-1][1]) {
+		rntc = info->pdata->thermistor_curve[tcsz-1][0];
+	} else {
+		for (i = 1; i < tcsz; i++) {
+			if (tval > info->pdata->thermistor_curve[i][1]) {
+				rmin = info->pdata->thermistor_curve[i-1][0];
+				rmax = info->pdata->thermistor_curve[i][0];
+				tmin = info->pdata->thermistor_curve[i-1][1];
+				tmax = info->pdata->thermistor_curve[i][1];
+				rntc = rmin + ((rmax - rmin) *
+					(tval - tmin) / (tmax - tmin));
+				break;
+			}
+		}
+	}
+
+	/* we need the current to calculate the proper adc voltage */
+	ret = fuel_gauge_reg_readb(info, AXP20X_ADC_RATE);
+	if (ret < 0) {
+		dev_err(&info->pdev->dev, "%s:read err:%d\n", __func__, ret);
+		ret = 0x30;
+	}
+
+	/*
+	 * temperature is proportional to NTS thermistor resistance
+	 * ADC_RATE[5-4] determines current, 00=20uA,01=40uA,10=60uA,11=80uA
+	 * [12-bit ADC VAL] = R_NTC(Ω) * current / 800
+	 */
+	adc_val = rntc * (20 + (20 * ((ret >> 4) & 0x3))) / 800;
+
+	return adc_val;
+}
+
+static int adc_to_temp(struct axp288_fg_info *info, int adc_val)
+{
+	int ret, r, i, tval = 0;
+	int rmin, rmax, tmin, tmax;
+	int tcsz = info->pdata->tcsz;
+
+	ret = fuel_gauge_reg_readb(info, AXP20X_ADC_RATE);
+	if (ret < 0) {
+		dev_err(&info->pdev->dev, "%s:read err:%d\n", __func__, ret);
+		ret = 0x30;
+	}
+
+	/*
+	 * temperature is proportional to NTS thermistor resistance
+	 * ADC_RATE[5-4] determines current, 00=20uA,01=40uA,10=60uA,11=80uA
+	 * R_NTC(Ω) = [12-bit ADC VAL] * 800 / current
+	 */
+	r = adc_val * 800 / (20 + (20 * ((ret >> 4) & 0x3)));
+
+	if (r < info->pdata->thermistor_curve[0][0]) {
+		tval = info->pdata->thermistor_curve[0][1];
+	} else if (r >= info->pdata->thermistor_curve[tcsz-1][0]) {
+		tval = info->pdata->thermistor_curve[tcsz-1][1];
+	} else {
+		for (i = 1; i < tcsz; i++) {
+			if (r < info->pdata->thermistor_curve[i][0]) {
+				rmin = info->pdata->thermistor_curve[i-1][0];
+				rmax = info->pdata->thermistor_curve[i][0];
+				tmin = info->pdata->thermistor_curve[i-1][1];
+				tmax = info->pdata->thermistor_curve[i][1];
+				tval = tmin + ((tmax - tmin) *
+					(r - rmin) / (rmax - rmin));
+				break;
+			}
+		}
+	}
+
+	return tval;
+}
+
+static int fuel_gauge_get_btemp(struct axp288_fg_info *info, int *btemp)
+{
+	int ret, raw_val = 0;
+
+	ret = pmic_read_adc_val("axp288-batt-temp", &raw_val, info);
+	if (ret < 0)
+		goto temp_read_fail;
+
+	*btemp = adc_to_temp(info, raw_val);
+
+temp_read_fail:
+	return ret;
+}
+
+static int fuel_gauge_get_vocv(struct axp288_fg_info *info, int *vocv)
+{
+	int ret, value;
+
+	/* 12-bit data value, upper 8 in OCVH, lower 4 in OCVL */
+	ret = fuel_gauge_reg_readb(info, AXP288_FG_OCVH_REG);
+	if (ret < 0)
+		goto vocv_read_fail;
+	value = ret << 4;
+
+	ret = fuel_gauge_reg_readb(info, AXP288_FG_OCVL_REG);
+	if (ret < 0)
+		goto vocv_read_fail;
+	value |= (ret & 0xf);
+
+	*vocv = VOLTAGE_FROM_ADC(value);
+vocv_read_fail:
+	return ret;
+}
+
+static int fuel_gauge_battery_health(struct axp288_fg_info *info)
+{
+	int temp, vocv;
+	int ret, health = POWER_SUPPLY_HEALTH_UNKNOWN;
+
+	ret = fuel_gauge_get_btemp(info, &temp);
+	if (ret < 0)
+		goto health_read_fail;
+
+	ret = fuel_gauge_get_vocv(info, &vocv);
+	if (ret < 0)
+		goto health_read_fail;
+
+	if (vocv > info->pdata->max_volt)
+		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+	else if (temp > info->pdata->max_temp)
+		health = POWER_SUPPLY_HEALTH_OVERHEAT;
+	else if (temp < info->pdata->min_temp)
+		health = POWER_SUPPLY_HEALTH_COLD;
+	else if (vocv < info->pdata->min_volt)
+		health = POWER_SUPPLY_HEALTH_DEAD;
+	else
+		health = POWER_SUPPLY_HEALTH_GOOD;
+
+health_read_fail:
+	return health;
+}
+
+static int fuel_gauge_set_high_btemp_alert(struct axp288_fg_info *info)
+{
+	int ret, adc_val;
+
+	/* program temperature threshold as 1/16 ADC value */
+	adc_val = temp_to_adc(info, info->pdata->max_temp);
+	ret = fuel_gauge_reg_writeb(info, AXP20X_V_HTF_DISCHRG, adc_val >> 4);
+
+	return ret;
+}
+
+static int fuel_gauge_set_low_btemp_alert(struct axp288_fg_info *info)
+{
+	int ret, adc_val;
+
+	/* program temperature threshold as 1/16 ADC value */
+	adc_val = temp_to_adc(info, info->pdata->min_temp);
+	ret = fuel_gauge_reg_writeb(info, AXP20X_V_LTF_DISCHRG, adc_val >> 4);
+
+	return ret;
+}
+
+static int fuel_gauge_get_property(struct power_supply *ps,
+		enum power_supply_property prop,
+		union power_supply_propval *val)
+{
+	struct axp288_fg_info *info = power_supply_get_drvdata(ps);
+	int ret = 0, value;
+
+	mutex_lock(&info->lock);
+	switch (prop) {
+	case POWER_SUPPLY_PROP_STATUS:
+		fuel_gauge_get_status(info);
+		val->intval = info->status;
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		val->intval = fuel_gauge_battery_health(info);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = fuel_gauge_get_vbatt(info, &value);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		val->intval = PROP_VOLT(value);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_OCV:
+		ret = fuel_gauge_get_vocv(info, &value);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		val->intval = PROP_VOLT(value);
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = fuel_gauge_get_current(info, &value);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		val->intval = PROP_CURR(value);
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		ret = fuel_gauge_reg_readb(info, AXP20X_PWR_OP_MODE);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+
+		if (ret & CHRG_STAT_BAT_PRESENT)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		ret = fuel_gauge_reg_readb(info, AXP20X_FG_RES);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+
+		if (!(ret & FG_REP_CAP_VALID))
+			dev_err(&info->pdev->dev,
+				"capacity measurement not valid\n");
+		val->intval = (ret & FG_REP_CAP_VAL_MASK);
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
+		ret = fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		val->intval = (ret & 0x0f);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		ret = fuel_gauge_get_btemp(info, &value);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		val->intval = PROP_TEMP(value);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_MAX:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		val->intval = PROP_TEMP(info->pdata->max_temp);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_MIN:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
+		val->intval = PROP_TEMP(info->pdata->min_temp);
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		ret = fuel_gauge_reg_readb(info, AXP288_FG_CC_MTR1_REG);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+
+		value = (ret & FG_CC_MTR1_VAL_MASK) << 8;
+		ret = fuel_gauge_reg_readb(info, AXP288_FG_CC_MTR0_REG);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		value |= (ret & FG_CC_MTR0_VAL_MASK);
+		val->intval = value * FG_DES_CAP_RES_LSB;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+		ret = fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP1_REG);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+
+		value = (ret & FG_DES_CAP1_VAL_MASK) << 8;
+		ret = fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP0_REG);
+		if (ret < 0)
+			goto fuel_gauge_read_err;
+		value |= (ret & FG_DES_CAP0_VAL_MASK);
+		val->intval = value * FG_DES_CAP_RES_LSB;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		val->intval = PROP_CURR(info->pdata->design_cap);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		val->intval = PROP_VOLT(info->pdata->max_volt);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = PROP_VOLT(info->pdata->min_volt);
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = info->pdata->battid;
+		break;
+	default:
+		mutex_unlock(&info->lock);
+		return -EINVAL;
+	}
+
+	mutex_unlock(&info->lock);
+	return 0;
+
+fuel_gauge_read_err:
+	mutex_unlock(&info->lock);
+	return ret;
+}
+
+static int fuel_gauge_set_property(struct power_supply *ps,
+		enum power_supply_property prop,
+		const union power_supply_propval *val)
+{
+	struct axp288_fg_info *info = power_supply_get_drvdata(ps);
+	int ret = 0;
+
+	mutex_lock(&info->lock);
+	switch (prop) {
+	case POWER_SUPPLY_PROP_STATUS:
+		info->status = val->intval;
+		break;
+	case POWER_SUPPLY_PROP_TEMP_MIN:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
+		if ((val->intval < PD_DEF_MIN_TEMP) ||
+			(val->intval > PD_DEF_MAX_TEMP)) {
+			ret = -EINVAL;
+			break;
+		}
+		info->pdata->min_temp = UNPROP_TEMP(val->intval);
+		ret = fuel_gauge_set_low_btemp_alert(info);
+		if (ret < 0)
+			dev_err(&info->pdev->dev,
+				"temp alert min set fail:%d\n", ret);
+		break;
+	case POWER_SUPPLY_PROP_TEMP_MAX:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		if ((val->intval < PD_DEF_MIN_TEMP) ||
+			(val->intval > PD_DEF_MAX_TEMP)) {
+			ret = -EINVAL;
+			break;
+		}
+		info->pdata->max_temp = UNPROP_TEMP(val->intval);
+		ret = fuel_gauge_set_high_btemp_alert(info);
+		if (ret < 0)
+			dev_err(&info->pdev->dev,
+				"temp alert max set fail:%d\n", ret);
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
+		if ((val->intval < 0) || (val->intval > 15)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = fuel_gauge_reg_readb(info, AXP288_FG_LOW_CAP_REG);
+		if (ret < 0)
+			break;
+		ret &= 0xf0;
+		ret |= (val->intval & 0xf);
+		ret = fuel_gauge_reg_writeb(info, AXP288_FG_LOW_CAP_REG, ret);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&info->lock);
+	return ret;
+}
+
+static int fuel_gauge_property_is_writeable(struct power_supply *psy,
+	enum power_supply_property psp)
+{
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+	case POWER_SUPPLY_PROP_TEMP_MIN:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
+	case POWER_SUPPLY_PROP_TEMP_MAX:
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+	case POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN:
+		ret = 1;
+		break;
+	default:
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static void fuel_gauge_status_monitor(struct work_struct *work)
+{
+	struct axp288_fg_info *info = container_of(work,
+		struct axp288_fg_info, status_monitor.work);
+
+	fuel_gauge_get_status(info);
+	power_supply_changed(info->bat);
+	schedule_delayed_work(&info->status_monitor, STATUS_MON_DELAY_JIFFIES);
+}
+
+static irqreturn_t fuel_gauge_thread_handler(int irq, void *dev)
+{
+	struct axp288_fg_info *info = dev;
+	int i;
+
+	for (i = 0; i < AXP288_FG_INTR_NUM; i++) {
+		if (info->irq[i] == irq)
+			break;
+	}
+
+	if (i >= AXP288_FG_INTR_NUM) {
+		dev_warn(&info->pdev->dev, "spurious interrupt!!\n");
+		return IRQ_NONE;
+	}
+
+	switch (i) {
+	case QWBTU_IRQ:
+		dev_info(&info->pdev->dev,
+			"Quit Battery under temperature in work mode IRQ (QWBTU)\n");
+		break;
+	case WBTU_IRQ:
+		dev_info(&info->pdev->dev,
+			"Battery under temperature in work mode IRQ (WBTU)\n");
+		break;
+	case QWBTO_IRQ:
+		dev_info(&info->pdev->dev,
+			"Quit Battery over temperature in work mode IRQ (QWBTO)\n");
+		break;
+	case WBTO_IRQ:
+		dev_info(&info->pdev->dev,
+			"Battery over temperature in work mode IRQ (WBTO)\n");
+		break;
+	case WL2_IRQ:
+		dev_info(&info->pdev->dev, "Low Batt Warning(2) INTR\n");
+		break;
+	case WL1_IRQ:
+		dev_info(&info->pdev->dev, "Low Batt Warning(1) INTR\n");
+		break;
+	default:
+		dev_warn(&info->pdev->dev, "Spurious Interrupt!!!\n");
+	}
+
+	power_supply_changed(info->bat);
+	return IRQ_HANDLED;
+}
+
+static void fuel_gauge_external_power_changed(struct power_supply *psy)
+{
+	struct axp288_fg_info *info = power_supply_get_drvdata(psy);
+
+	power_supply_changed(info->bat);
+}
+
+static const struct power_supply_desc fuel_gauge_desc = {
+	.name			= DEV_NAME,
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= fuel_gauge_props,
+	.num_properties		= ARRAY_SIZE(fuel_gauge_props),
+	.get_property		= fuel_gauge_get_property,
+	.set_property		= fuel_gauge_set_property,
+	.property_is_writeable	= fuel_gauge_property_is_writeable,
+	.external_power_changed	= fuel_gauge_external_power_changed,
+};
+
+static int fuel_gauge_set_lowbatt_thresholds(struct axp288_fg_info *info)
+{
+	int ret;
+	u8 reg_val;
+
+	ret = fuel_gauge_reg_readb(info, AXP20X_FG_RES);
+	if (ret < 0) {
+		dev_err(&info->pdev->dev, "%s:read err:%d\n", __func__, ret);
+		return ret;
+	}
+	ret = (ret & FG_REP_CAP_VAL_MASK);
+
+	if (ret > FG_LOW_CAP_WARN_THR)
+		reg_val = FG_LOW_CAP_WARN_THR;
+	else if (ret > FG_LOW_CAP_CRIT_THR)
+		reg_val = FG_LOW_CAP_CRIT_THR;
+	else
+		reg_val = FG_LOW_CAP_SHDN_THR;
+
+	reg_val |= FG_LOW_CAP_THR1_VAL;
+	ret = fuel_gauge_reg_writeb(info, AXP288_FG_LOW_CAP_REG, reg_val);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "%s:write err:%d\n", __func__, ret);
+
+	return ret;
+}
+
+static int fuel_gauge_program_vbatt_full(struct axp288_fg_info *info)
+{
+	int ret;
+	u8 val;
+
+	ret = fuel_gauge_reg_readb(info, AXP20X_CHRG_CTRL1);
+	if (ret < 0)
+		goto fg_prog_ocv_fail;
+	else
+		val = (ret & ~CHRG_CCCV_CV_MASK);
+
+	switch (info->pdata->max_volt) {
+	case CV_4100:
+		val |= (CHRG_CCCV_CV_4100MV << CHRG_CCCV_CV_BIT_POS);
+		break;
+	case CV_4150:
+		val |= (CHRG_CCCV_CV_4150MV << CHRG_CCCV_CV_BIT_POS);
+		break;
+	case CV_4200:
+		val |= (CHRG_CCCV_CV_4200MV << CHRG_CCCV_CV_BIT_POS);
+		break;
+	case CV_4350:
+		val |= (CHRG_CCCV_CV_4350MV << CHRG_CCCV_CV_BIT_POS);
+		break;
+	default:
+		val |= (CHRG_CCCV_CV_4200MV << CHRG_CCCV_CV_BIT_POS);
+		break;
+	}
+
+	ret = fuel_gauge_reg_writeb(info, AXP20X_CHRG_CTRL1, val);
+fg_prog_ocv_fail:
+	return ret;
+}
+
+static int fuel_gauge_program_design_cap(struct axp288_fg_info *info)
+{
+	int ret;
+
+	ret = fuel_gauge_reg_writeb(info,
+		AXP288_FG_DES_CAP1_REG, info->pdata->cap1);
+	if (ret < 0)
+		goto fg_prog_descap_fail;
+
+	ret = fuel_gauge_reg_writeb(info,
+		AXP288_FG_DES_CAP0_REG, info->pdata->cap0);
+
+fg_prog_descap_fail:
+	return ret;
+}
+
+static int fuel_gauge_program_ocv_curve(struct axp288_fg_info *info)
+{
+	int ret = 0, i;
+
+	for (i = 0; i < OCV_CURVE_SIZE; i++) {
+		ret = fuel_gauge_reg_writeb(info,
+			AXP288_FG_OCV_CURVE_REG + i, info->pdata->ocv_curve[i]);
+		if (ret < 0)
+			goto fg_prog_ocv_fail;
+	}
+
+fg_prog_ocv_fail:
+	return ret;
+}
+
+static int fuel_gauge_program_rdc_vals(struct axp288_fg_info *info)
+{
+	int ret;
+
+	ret = fuel_gauge_reg_writeb(info,
+		AXP288_FG_RDC1_REG, info->pdata->rdc1);
+	if (ret < 0)
+		goto fg_prog_ocv_fail;
+
+	ret = fuel_gauge_reg_writeb(info,
+		AXP288_FG_RDC0_REG, info->pdata->rdc0);
+
+fg_prog_ocv_fail:
+	return ret;
+}
+
+static void fuel_gauge_init_config_regs(struct axp288_fg_info *info)
+{
+	int ret;
+
+	/*
+	 * check if the config data is already
+	 * programmed and if so just return.
+	 */
+
+	ret = fuel_gauge_reg_readb(info, AXP288_FG_DES_CAP1_REG);
+	if (ret < 0) {
+		dev_warn(&info->pdev->dev, "CAP1 reg read err!!\n");
+	} else if (!(ret & FG_DES_CAP1_VALID)) {
+		dev_info(&info->pdev->dev, "FG data needs to be initialized\n");
+	} else {
+		dev_info(&info->pdev->dev, "FG data is already initialized\n");
+		return;
+	}
+
+	ret = fuel_gauge_program_vbatt_full(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "set vbatt full fail:%d\n", ret);
+
+	ret = fuel_gauge_program_design_cap(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "set design cap fail:%d\n", ret);
+
+	ret = fuel_gauge_program_rdc_vals(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "set rdc fail:%d\n", ret);
+
+	ret = fuel_gauge_program_ocv_curve(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "set ocv curve fail:%d\n", ret);
+
+	ret = fuel_gauge_set_lowbatt_thresholds(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "lowbatt thr set fail:%d\n", ret);
+
+	ret = fuel_gauge_reg_writeb(info, AXP20X_CC_CTRL, 0xef);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "gauge cntl set fail:%d\n", ret);
+}
+
+static void fuel_gauge_init_irq(struct axp288_fg_info *info)
+{
+	int ret, i, pirq;
+
+	for (i = 0; i < AXP288_FG_INTR_NUM; i++) {
+		pirq = platform_get_irq(info->pdev, i);
+		info->irq[i] = regmap_irq_get_virq(info->regmap_irqc, pirq);
+		if (info->irq[i] < 0) {
+			dev_warn(&info->pdev->dev,
+				"regmap_irq get virq failed for IRQ %d: %d\n",
+				pirq, info->irq[i]);
+			info->irq[i] = -1;
+			goto intr_failed;
+		}
+		ret = request_threaded_irq(info->irq[i],
+				NULL, fuel_gauge_thread_handler,
+				IRQF_ONESHOT, DEV_NAME, info);
+		if (ret) {
+			dev_warn(&info->pdev->dev,
+				"request irq failed for IRQ %d: %d\n",
+				pirq, info->irq[i]);
+			info->irq[i] = -1;
+			goto intr_failed;
+		} else {
+			dev_info(&info->pdev->dev, "HW IRQ %d -> VIRQ %d\n",
+				pirq, info->irq[i]);
+		}
+	}
+	return;
+
+intr_failed:
+	for (; i > 0; i--) {
+		free_irq(info->irq[i - 1], info);
+		info->irq[i - 1] = -1;
+	}
+}
+
+static void fuel_gauge_init_hw_regs(struct axp288_fg_info *info)
+{
+	int ret;
+	unsigned int val;
+
+	ret = fuel_gauge_set_high_btemp_alert(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "high batt temp set fail:%d\n", ret);
+
+	ret = fuel_gauge_set_low_btemp_alert(info);
+	if (ret < 0)
+		dev_err(&info->pdev->dev, "low batt temp set fail:%d\n", ret);
+
+	/* enable interrupts */
+	val = fuel_gauge_reg_readb(info, AXP20X_IRQ3_EN);
+	val |= TEMP_IRQ_CFG_MASK;
+	fuel_gauge_reg_writeb(info, AXP20X_IRQ3_EN, val);
+
+	val = fuel_gauge_reg_readb(info, AXP20X_IRQ4_EN);
+	val |= FG_IRQ_CFG_LOWBATT_MASK;
+	val = fuel_gauge_reg_writeb(info, AXP20X_IRQ4_EN, val);
+}
+
+static int axp288_fuel_gauge_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct axp288_fg_info *info;
+	struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent);
+	struct power_supply_config psy_cfg = {};
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->pdev = pdev;
+	info->regmap = axp20x->regmap;
+	info->regmap_irqc = axp20x->regmap_irqc;
+	info->status = POWER_SUPPLY_STATUS_UNKNOWN;
+	info->pdata = pdev->dev.platform_data;
+	if (!info->pdata)
+		return -ENODEV;
+
+	platform_set_drvdata(pdev, info);
+
+	mutex_init(&info->lock);
+	INIT_DELAYED_WORK(&info->status_monitor, fuel_gauge_status_monitor);
+
+	psy_cfg.drv_data = info;
+	info->bat = power_supply_register(&pdev->dev, &fuel_gauge_desc, &psy_cfg);
+	if (IS_ERR(info->bat)) {
+		ret = PTR_ERR(info->bat);
+		dev_err(&pdev->dev, "failed to register battery: %d\n", ret);
+		return ret;
+	}
+
+	fuel_gauge_create_debugfs(info);
+	fuel_gauge_init_config_regs(info);
+	fuel_gauge_init_irq(info);
+	fuel_gauge_init_hw_regs(info);
+	schedule_delayed_work(&info->status_monitor, STATUS_MON_DELAY_JIFFIES);
+
+	return ret;
+}
+
+static struct platform_device_id axp288_fg_id_table[] = {
+	{ .name = DEV_NAME },
+	{},
+};
+
+static int axp288_fuel_gauge_remove(struct platform_device *pdev)
+{
+	struct axp288_fg_info *info = platform_get_drvdata(pdev);
+	int i;
+
+	cancel_delayed_work_sync(&info->status_monitor);
+	power_supply_unregister(info->bat);
+	fuel_gauge_remove_debugfs(info);
+
+	for (i = 0; i < AXP288_FG_INTR_NUM; i++)
+		if (info->irq[i] >= 0)
+			free_irq(info->irq[i], info);
+
+	return 0;
+}
+
+static struct platform_driver axp288_fuel_gauge_driver = {
+	.probe = axp288_fuel_gauge_probe,
+	.remove = axp288_fuel_gauge_remove,
+	.id_table = axp288_fg_id_table,
+	.driver = {
+		.name = DEV_NAME,
+	},
+};
+
+module_platform_driver(axp288_fuel_gauge_driver);
+
+MODULE_AUTHOR("Todd Brandt <todd.e.brandt@linux.intel.com>");
+MODULE_DESCRIPTION("Xpower AXP288 Fuel Gauge Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
index 1f49986..6c534dc 100644
--- a/drivers/power/bq2415x_charger.c
+++ b/drivers/power/bq2415x_charger.c
@@ -13,12 +13,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/*
  * Datasheets:
  * http://www.ti.com/product/bq24150
  * http://www.ti.com/product/bq24150a
@@ -26,6 +20,8 @@
  * http://www.ti.com/product/bq24153
  * http://www.ti.com/product/bq24153a
  * http://www.ti.com/product/bq24155
+ * http://www.ti.com/product/bq24157s
+ * http://www.ti.com/product/bq24158
  */
 
 #include <linux/kernel.h>
@@ -147,6 +143,7 @@
 	BQ24155,
 	BQ24156,
 	BQ24156A,
+	BQ24157S,
 	BQ24158,
 };
 
@@ -162,18 +159,20 @@
 	"bq24155",
 	"bq24156",
 	"bq24156a",
+	"bq24157s",
 	"bq24158",
 };
 
 struct bq2415x_device {
 	struct device *dev;
 	struct bq2415x_platform_data init_data;
-	struct power_supply charger;
+	struct power_supply *charger;
+	struct power_supply_desc charger_desc;
 	struct delayed_work work;
 	struct power_supply *notify_psy;
 	struct notifier_block nb;
 	enum bq2415x_mode reported_mode;/* mode reported by hook function */
-	enum bq2415x_mode mode;		/* current configured mode */
+	enum bq2415x_mode mode;		/* currently configured mode */
 	enum bq2415x_chip chip;
 	const char *timer_error;
 	char *model;
@@ -352,8 +351,7 @@
 			BQ2415X_BIT_CE);
 		if (ret < 0)
 			return ret;
-		else
-			return ret > 0 ? 0 : 1;
+		return ret > 0 ? 0 : 1;
 	case BQ2415X_CHARGER_ENABLE:
 		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
 				0, BQ2415X_BIT_CE);
@@ -426,20 +424,17 @@
 		case 0:
 			if (bq->chip == BQ24151A)
 				return bq->chip;
-			else
-				return BQ24151;
+			return BQ24151;
 		case 1:
 			if (bq->chip == BQ24150A ||
 				bq->chip == BQ24152 ||
 				bq->chip == BQ24155)
 				return bq->chip;
-			else
-				return BQ24150;
+			return BQ24150;
 		case 2:
 			if (bq->chip == BQ24153A)
 				return bq->chip;
-			else
-				return BQ24153;
+			return BQ24153;
 		default:
 			return BQUNKNOWN;
 		}
@@ -450,9 +445,10 @@
 		case 0:
 			if (bq->chip == BQ24156A)
 				return bq->chip;
-			else
-				return BQ24156;
+			return BQ24156;
 		case 2:
+			if (bq->chip == BQ24157S)
+				return bq->chip;
 			return BQ24158;
 		default:
 			return BQUNKNOWN;
@@ -480,24 +476,22 @@
 	case BQ24152:
 		if (ret >= 0 && ret <= 3)
 			return ret;
-		else
-			return -1;
+		return -1;
 	case BQ24153:
 	case BQ24153A:
 	case BQ24156:
 	case BQ24156A:
+	case BQ24157S:
 	case BQ24158:
 		if (ret == 3)
 			return 0;
 		else if (ret == 1)
 			return 1;
-		else
-			return -1;
+		return -1;
 	case BQ24155:
 		if (ret == 3)
 			return 3;
-		else
-			return -1;
+		return -1;
 	case BQUNKNOWN:
 		return -1;
 	}
@@ -791,7 +785,7 @@
 	bq2415x_set_default_value(bq, battery_regulation_voltage);
 
 	bq->mode = mode;
-	sysfs_notify(&bq->charger.dev->kobj, NULL, "mode");
+	sysfs_notify(&bq->charger->dev.kobj, NULL, "mode");
 
 	return 0;
 
@@ -816,7 +810,8 @@
 
 	dev_dbg(bq->dev, "notifier call was called\n");
 
-	ret = psy->get_property(psy, POWER_SUPPLY_PROP_CURRENT_MAX, &prop);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_CURRENT_MAX,
+			&prop);
 	if (ret != 0)
 		return NOTIFY_OK;
 
@@ -874,7 +869,7 @@
 static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
 {
 	bq->timer_error = msg;
-	sysfs_notify(&bq->charger.dev->kobj, NULL, "timer");
+	sysfs_notify(&bq->charger->dev.kobj, NULL, "timer");
 	dev_err(bq->dev, "%s\n", msg);
 	if (bq->automode > 0)
 		bq->automode = 0;
@@ -892,7 +887,7 @@
 	int boost;
 
 	if (bq->automode > 0 && (bq->reported_mode != bq->mode)) {
-		sysfs_notify(&bq->charger.dev->kobj, NULL, "reported_mode");
+		sysfs_notify(&bq->charger->dev.kobj, NULL, "reported_mode");
 		bq2415x_set_mode(bq, bq->reported_mode);
 	}
 
@@ -998,8 +993,7 @@
 					     enum power_supply_property psp,
 					     union power_supply_propval *val)
 {
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	int ret;
 
 	switch (psp) {
@@ -1030,12 +1024,14 @@
 	int ret;
 	int chip;
 	char revstr[8];
+	struct power_supply_config psy_cfg = { .drv_data = bq, };
 
-	bq->charger.name = bq->name;
-	bq->charger.type = POWER_SUPPLY_TYPE_USB;
-	bq->charger.properties = bq2415x_power_supply_props;
-	bq->charger.num_properties = ARRAY_SIZE(bq2415x_power_supply_props);
-	bq->charger.get_property = bq2415x_power_supply_get_property;
+	bq->charger_desc.name = bq->name;
+	bq->charger_desc.type = POWER_SUPPLY_TYPE_USB;
+	bq->charger_desc.properties = bq2415x_power_supply_props;
+	bq->charger_desc.num_properties =
+			ARRAY_SIZE(bq2415x_power_supply_props);
+	bq->charger_desc.get_property = bq2415x_power_supply_get_property;
 
 	ret = bq2415x_detect_chip(bq);
 	if (ret < 0)
@@ -1058,10 +1054,11 @@
 		return -ENOMEM;
 	}
 
-	ret = power_supply_register(bq->dev, &bq->charger);
-	if (ret) {
+	bq->charger = power_supply_register(bq->dev, &bq->charger_desc,
+					    &psy_cfg);
+	if (IS_ERR(bq->charger)) {
 		kfree(bq->model);
-		return ret;
+		return PTR_ERR(bq->charger);
 	}
 
 	return 0;
@@ -1073,7 +1070,7 @@
 	if (bq->automode > 0)
 		bq->automode = 0;
 	cancel_delayed_work_sync(&bq->work);
-	power_supply_unregister(&bq->charger);
+	power_supply_unregister(bq->charger);
 	kfree(bq->model);
 }
 
@@ -1085,8 +1082,7 @@
 					 char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	enum bq2415x_command command;
 	int ret;
 
@@ -1119,8 +1115,7 @@
 				       size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	if (strncmp(buf, "auto", 4) == 0)
@@ -1141,8 +1136,7 @@
 					char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 
 	if (bq->timer_error)
 		return sprintf(buf, "%s\n", bq->timer_error);
@@ -1166,8 +1160,7 @@
 				      size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	enum bq2415x_mode mode;
 	int ret = 0;
 
@@ -1219,8 +1212,7 @@
 				       char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	ssize_t ret = 0;
 
 	if (bq->automode > 0)
@@ -1257,8 +1249,7 @@
 						char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 
 	if (bq->automode < 0)
 		return -EINVAL;
@@ -1286,8 +1277,7 @@
 					   size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	ssize_t ret = 0;
 	unsigned int reg;
 	unsigned int val;
@@ -1322,8 +1312,7 @@
 					    char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	ssize_t ret = 0;
 
 	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret);
@@ -1341,8 +1330,7 @@
 				       size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	long val;
 	int ret;
 
@@ -1373,8 +1361,7 @@
 					char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	int ret;
 
 	if (strcmp(attr->attr.name, "current_limit") == 0)
@@ -1402,8 +1389,7 @@
 					size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	enum bq2415x_command command;
 	long val;
 	int ret;
@@ -1438,8 +1424,7 @@
 					 char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						 charger);
+	struct bq2415x_device *bq = power_supply_get_drvdata(psy);
 	enum bq2415x_command command;
 	int ret;
 
@@ -1530,13 +1515,13 @@
 
 static int bq2415x_sysfs_init(struct bq2415x_device *bq)
 {
-	return sysfs_create_group(&bq->charger.dev->kobj,
+	return sysfs_create_group(&bq->charger->dev.kobj,
 			&bq2415x_sysfs_attr_group);
 }
 
 static void bq2415x_sysfs_exit(struct bq2415x_device *bq)
 {
-	sysfs_remove_group(&bq->charger.dev->kobj, &bq2415x_sysfs_attr_group);
+	sysfs_remove_group(&bq->charger->dev.kobj, &bq2415x_sysfs_attr_group);
 }
 
 /* main bq2415x probe function */
@@ -1609,27 +1594,27 @@
 		ret = of_property_read_u32(np, "ti,current-limit",
 				&bq->init_data.current_limit);
 		if (ret)
-			goto error_2;
+			goto error_3;
 		ret = of_property_read_u32(np, "ti,weak-battery-voltage",
 				&bq->init_data.weak_battery_voltage);
 		if (ret)
-			goto error_2;
+			goto error_3;
 		ret = of_property_read_u32(np, "ti,battery-regulation-voltage",
 				&bq->init_data.battery_regulation_voltage);
 		if (ret)
-			goto error_2;
+			goto error_3;
 		ret = of_property_read_u32(np, "ti,charge-current",
 				&bq->init_data.charge_current);
 		if (ret)
-			goto error_2;
+			goto error_3;
 		ret = of_property_read_u32(np, "ti,termination-current",
 				&bq->init_data.termination_current);
 		if (ret)
-			goto error_2;
+			goto error_3;
 		ret = of_property_read_u32(np, "ti,resistor-sense",
 				&bq->init_data.resistor_sense);
 		if (ret)
-			goto error_2;
+			goto error_3;
 	} else {
 		memcpy(&bq->init_data, pdata, sizeof(bq->init_data));
 	}
@@ -1639,19 +1624,19 @@
 	ret = bq2415x_power_supply_init(bq);
 	if (ret) {
 		dev_err(bq->dev, "failed to register power supply: %d\n", ret);
-		goto error_2;
+		goto error_3;
 	}
 
 	ret = bq2415x_sysfs_init(bq);
 	if (ret) {
 		dev_err(bq->dev, "failed to create sysfs entries: %d\n", ret);
-		goto error_3;
+		goto error_4;
 	}
 
 	ret = bq2415x_set_defaults(bq);
 	if (ret) {
 		dev_err(bq->dev, "failed to set default values: %d\n", ret);
-		goto error_4;
+		goto error_5;
 	}
 
 	if (bq->notify_psy) {
@@ -1659,7 +1644,7 @@
 		ret = power_supply_reg_notifier(&bq->nb);
 		if (ret) {
 			dev_err(bq->dev, "failed to reg notifier: %d\n", ret);
-			goto error_5;
+			goto error_6;
 		}
 
 		/* Query for initial reported_mode and set it */
@@ -1679,11 +1664,14 @@
 	dev_info(bq->dev, "driver registered\n");
 	return 0;
 
+error_6:
 error_5:
-error_4:
 	bq2415x_sysfs_exit(bq);
-error_3:
+error_4:
 	bq2415x_power_supply_exit(bq);
+error_3:
+	if (bq->notify_psy)
+		power_supply_put(bq->notify_psy);
 error_2:
 	kfree(name);
 error_1:
@@ -1700,8 +1688,10 @@
 {
 	struct bq2415x_device *bq = i2c_get_clientdata(client);
 
-	if (bq->notify_psy)
+	if (bq->notify_psy) {
 		power_supply_unreg_notifier(&bq->nb);
+		power_supply_put(bq->notify_psy);
+	}
 
 	bq2415x_sysfs_exit(bq);
 	bq2415x_power_supply_exit(bq);
@@ -1731,6 +1721,7 @@
 	{ "bq24155", BQ24155 },
 	{ "bq24156", BQ24156 },
 	{ "bq24156a", BQ24156A },
+	{ "bq24157s", BQ24157S },
 	{ "bq24158", BQ24158 },
 	{},
 };
diff --git a/drivers/power/bq24190_charger.c b/drivers/power/bq24190_charger.c
index d0e8236..407c4af 100644
--- a/drivers/power/bq24190_charger.c
+++ b/drivers/power/bq24190_charger.c
@@ -152,8 +152,8 @@
 struct bq24190_dev_info {
 	struct i2c_client		*client;
 	struct device			*dev;
-	struct power_supply		charger;
-	struct power_supply		battery;
+	struct power_supply		*charger;
+	struct power_supply		*battery;
 	char				model_name[I2C_NAME_SIZE];
 	kernel_ulong_t			model;
 	unsigned int			gpio_int;
@@ -423,8 +423,7 @@
 		struct device_attribute *attr, char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq24190_dev_info *bdi =
-			container_of(psy, struct bq24190_dev_info, charger);
+	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	struct bq24190_sysfs_field_info *info;
 	int ret;
 	u8 v;
@@ -444,8 +443,7 @@
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
-	struct bq24190_dev_info *bdi =
-			container_of(psy, struct bq24190_dev_info, charger);
+	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	struct bq24190_sysfs_field_info *info;
 	int ret;
 	u8 v;
@@ -469,13 +467,13 @@
 {
 	bq24190_sysfs_init_attrs();
 
-	return sysfs_create_group(&bdi->charger.dev->kobj,
+	return sysfs_create_group(&bdi->charger->dev.kobj,
 			&bq24190_sysfs_attr_group);
 }
 
 static void bq24190_sysfs_remove_group(struct bq24190_dev_info *bdi)
 {
-	sysfs_remove_group(&bdi->charger.dev->kobj, &bq24190_sysfs_attr_group);
+	sysfs_remove_group(&bdi->charger->dev.kobj, &bq24190_sysfs_attr_group);
 }
 #else
 static int bq24190_sysfs_create_group(struct bq24190_dev_info *bdi)
@@ -807,8 +805,7 @@
 static int bq24190_charger_get_property(struct power_supply *psy,
 		enum power_supply_property psp, union power_supply_propval *val)
 {
-	struct bq24190_dev_info *bdi =
-			container_of(psy, struct bq24190_dev_info, charger);
+	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
@@ -861,8 +858,7 @@
 		enum power_supply_property psp,
 		const union power_supply_propval *val)
 {
-	struct bq24190_dev_info *bdi =
-			container_of(psy, struct bq24190_dev_info, charger);
+	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
@@ -922,18 +918,15 @@
 	"main-battery",
 };
 
-static void bq24190_charger_init(struct power_supply *charger)
-{
-	charger->name = "bq24190-charger";
-	charger->type = POWER_SUPPLY_TYPE_USB;
-	charger->properties = bq24190_charger_properties;
-	charger->num_properties = ARRAY_SIZE(bq24190_charger_properties);
-	charger->supplied_to = bq24190_charger_supplied_to;
-	charger->num_supplicants = ARRAY_SIZE(bq24190_charger_supplied_to);
-	charger->get_property = bq24190_charger_get_property;
-	charger->set_property = bq24190_charger_set_property;
-	charger->property_is_writeable = bq24190_charger_property_is_writeable;
-}
+static const struct power_supply_desc bq24190_charger_desc = {
+	.name			= "bq24190-charger",
+	.type			= POWER_SUPPLY_TYPE_USB,
+	.properties		= bq24190_charger_properties,
+	.num_properties		= ARRAY_SIZE(bq24190_charger_properties),
+	.get_property		= bq24190_charger_get_property,
+	.set_property		= bq24190_charger_set_property,
+	.property_is_writeable	= bq24190_charger_property_is_writeable,
+};
 
 /* Battery power supply property routines */
 
@@ -1102,8 +1095,7 @@
 static int bq24190_battery_get_property(struct power_supply *psy,
 		enum power_supply_property psp, union power_supply_propval *val)
 {
-	struct bq24190_dev_info *bdi =
-			container_of(psy, struct bq24190_dev_info, battery);
+	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
@@ -1144,8 +1136,7 @@
 		enum power_supply_property psp,
 		const union power_supply_propval *val)
 {
-	struct bq24190_dev_info *bdi =
-			container_of(psy, struct bq24190_dev_info, battery);
+	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	int ret;
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
@@ -1193,16 +1184,15 @@
 	POWER_SUPPLY_PROP_SCOPE,
 };
 
-static void bq24190_battery_init(struct power_supply *battery)
-{
-	battery->name = "bq24190-battery";
-	battery->type = POWER_SUPPLY_TYPE_BATTERY;
-	battery->properties = bq24190_battery_properties;
-	battery->num_properties = ARRAY_SIZE(bq24190_battery_properties);
-	battery->get_property = bq24190_battery_get_property;
-	battery->set_property = bq24190_battery_set_property;
-	battery->property_is_writeable = bq24190_battery_property_is_writeable;
-}
+static const struct power_supply_desc bq24190_battery_desc = {
+	.name			= "bq24190-battery",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= bq24190_battery_properties,
+	.num_properties		= ARRAY_SIZE(bq24190_battery_properties),
+	.get_property		= bq24190_battery_get_property,
+	.set_property		= bq24190_battery_set_property,
+	.property_is_writeable	= bq24190_battery_property_is_writeable,
+};
 
 static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
 {
@@ -1269,8 +1259,8 @@
 	 * interrupt received).
 	 */
 	if (alert_userspace && !bdi->first_time) {
-		power_supply_changed(&bdi->charger);
-		power_supply_changed(&bdi->battery);
+		power_supply_changed(bdi->charger);
+		power_supply_changed(bdi->battery);
 		bdi->first_time = false;
 	}
 
@@ -1362,6 +1352,7 @@
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct device *dev = &client->dev;
 	struct bq24190_platform_data *pdata = client->dev.platform_data;
+	struct power_supply_config charger_cfg = {}, battery_cfg = {};
 	struct bq24190_dev_info *bdi;
 	int ret;
 
@@ -1416,19 +1407,23 @@
 		goto out2;
 	}
 
-	bq24190_charger_init(&bdi->charger);
-
-	ret = power_supply_register(dev, &bdi->charger);
-	if (ret) {
+	charger_cfg.drv_data = bdi;
+	charger_cfg.supplied_to = bq24190_charger_supplied_to;
+	charger_cfg.num_supplicants = ARRAY_SIZE(bq24190_charger_supplied_to),
+	bdi->charger = power_supply_register(dev, &bq24190_charger_desc,
+						&charger_cfg);
+	if (IS_ERR(bdi->charger)) {
 		dev_err(dev, "Can't register charger\n");
+		ret = PTR_ERR(bdi->charger);
 		goto out2;
 	}
 
-	bq24190_battery_init(&bdi->battery);
-
-	ret = power_supply_register(dev, &bdi->battery);
-	if (ret) {
+	battery_cfg.drv_data = bdi;
+	bdi->battery = power_supply_register(dev, &bq24190_battery_desc,
+						&battery_cfg);
+	if (IS_ERR(bdi->battery)) {
 		dev_err(dev, "Can't register battery\n");
+		ret = PTR_ERR(bdi->battery);
 		goto out3;
 	}
 
@@ -1441,9 +1436,9 @@
 	return 0;
 
 out4:
-	power_supply_unregister(&bdi->battery);
+	power_supply_unregister(bdi->battery);
 out3:
-	power_supply_unregister(&bdi->charger);
+	power_supply_unregister(bdi->charger);
 out2:
 	pm_runtime_disable(dev);
 out1:
@@ -1462,8 +1457,8 @@
 	pm_runtime_put_sync(bdi->dev);
 
 	bq24190_sysfs_remove_group(bdi);
-	power_supply_unregister(&bdi->battery);
-	power_supply_unregister(&bdi->charger);
+	power_supply_unregister(bdi->battery);
+	power_supply_unregister(bdi->charger);
 	pm_runtime_disable(bdi->dev);
 
 	if (bdi->gpio_int)
@@ -1499,8 +1494,8 @@
 	pm_runtime_put_sync(bdi->dev);
 
 	/* Things may have changed while suspended so alert upper layer */
-	power_supply_changed(&bdi->charger);
-	power_supply_changed(&bdi->battery);
+	power_supply_changed(bdi->charger);
+	power_supply_changed(bdi->battery);
 
 	return 0;
 }
diff --git a/drivers/power/bq24735-charger.c b/drivers/power/bq24735-charger.c
index d022b82..961a189 100644
--- a/drivers/power/bq24735-charger.c
+++ b/drivers/power/bq24735-charger.c
@@ -44,14 +44,15 @@
 #define BQ24735_DEVICE_ID		0xff
 
 struct bq24735 {
-	struct power_supply	charger;
-	struct i2c_client	*client;
-	struct bq24735_platform	*pdata;
+	struct power_supply		*charger;
+	struct power_supply_desc	charger_desc;
+	struct i2c_client		*client;
+	struct bq24735_platform		*pdata;
 };
 
 static inline struct bq24735 *to_bq24735(struct power_supply *psy)
 {
-	return container_of(psy, struct bq24735, charger);
+	return power_supply_get_drvdata(psy);
 }
 
 static enum power_supply_property bq24735_charger_properties[] = {
@@ -192,9 +193,7 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
-	struct bq24735 *charger;
-
-	charger = container_of(psy, struct bq24735, charger);
+	struct bq24735 *charger = to_bq24735(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_ONLINE:
@@ -248,7 +247,8 @@
 {
 	int ret;
 	struct bq24735 *charger;
-	struct power_supply *supply;
+	struct power_supply_desc *supply_desc;
+	struct power_supply_config psy_cfg = {};
 	char *name;
 
 	charger = devm_kzalloc(&client->dev, sizeof(*charger), GFP_KERNEL);
@@ -277,16 +277,18 @@
 
 	charger->client = client;
 
-	supply = &charger->charger;
+	supply_desc = &charger->charger_desc;
 
-	supply->name = name;
-	supply->type = POWER_SUPPLY_TYPE_MAINS;
-	supply->properties = bq24735_charger_properties;
-	supply->num_properties = ARRAY_SIZE(bq24735_charger_properties);
-	supply->get_property = bq24735_charger_get_property;
-	supply->supplied_to = charger->pdata->supplied_to;
-	supply->num_supplicants = charger->pdata->num_supplicants;
-	supply->of_node = client->dev.of_node;
+	supply_desc->name = name;
+	supply_desc->type = POWER_SUPPLY_TYPE_MAINS;
+	supply_desc->properties = bq24735_charger_properties;
+	supply_desc->num_properties = ARRAY_SIZE(bq24735_charger_properties);
+	supply_desc->get_property = bq24735_charger_get_property;
+
+	psy_cfg.supplied_to = charger->pdata->supplied_to;
+	psy_cfg.num_supplicants = charger->pdata->num_supplicants;
+	psy_cfg.of_node = client->dev.of_node;
+	psy_cfg.drv_data = charger;
 
 	i2c_set_clientdata(client, charger);
 
@@ -341,8 +343,10 @@
 		}
 	}
 
-	ret = power_supply_register(&client->dev, supply);
-	if (ret < 0) {
+	charger->charger = power_supply_register(&client->dev, supply_desc,
+						 &psy_cfg);
+	if (IS_ERR(charger->charger)) {
+		ret = PTR_ERR(charger->charger);
 		dev_err(&client->dev, "Failed to register power supply: %d\n",
 			ret);
 		goto err_free_name;
@@ -354,7 +358,8 @@
 						IRQF_TRIGGER_RISING |
 						IRQF_TRIGGER_FALLING |
 						IRQF_ONESHOT,
-						supply->name, supply);
+						supply_desc->name,
+						charger->charger);
 		if (ret) {
 			dev_err(&client->dev,
 				"Unable to register IRQ %d err %d\n",
@@ -365,7 +370,7 @@
 
 	return 0;
 err_unregister_supply:
-	power_supply_unregister(supply);
+	power_supply_unregister(charger->charger);
 err_free_name:
 	if (name != charger->pdata->name)
 		kfree(name);
@@ -381,10 +386,10 @@
 		devm_free_irq(&charger->client->dev, charger->client->irq,
 			      &charger->charger);
 
-	power_supply_unregister(&charger->charger);
+	power_supply_unregister(charger->charger);
 
-	if (charger->charger.name != charger->pdata->name)
-		kfree(charger->charger.name);
+	if (charger->charger_desc.name != charger->pdata->name)
+		kfree(charger->charger_desc.name);
 
 	return 0;
 }
diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
index b72ba7c..a57433d 100644
--- a/drivers/power/bq27x00_battery.c
+++ b/drivers/power/bq27x00_battery.c
@@ -16,14 +16,12 @@
  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
- */
-
-/*
  * Datasheets:
  * http://focus.ti.com/docs/prod/folders/print/bq27000.html
  * http://focus.ti.com/docs/prod/folders/print/bq27500.html
  * http://www.ti.com/product/bq27425-g1
  * http://www.ti.com/product/BQ27742-G1
+ * http://www.ti.com/product/BQ27510-G3
  */
 
 #include <linux/device.h>
@@ -74,6 +72,10 @@
 
 #define BQ27742_POWER_AVG		0x76
 
+#define BQ27510_REG_SOC			0x20
+#define BQ27510_REG_DCAP		0x2E /* Design capacity */
+#define BQ27510_REG_CYCT		0x1E /* Cycle count total */
+
 /* bq27425 register addresses are same as bq27x00 addresses minus 4 */
 #define BQ27425_REG_OFFSET		0x04
 #define BQ27425_REG_SOC		(0x1C + BQ27425_REG_OFFSET)
@@ -87,7 +89,7 @@
 	int (*read)(struct bq27x00_device_info *di, u8 reg, bool single);
 };
 
-enum bq27x00_chip { BQ27000, BQ27500, BQ27425, BQ27742};
+enum bq27x00_chip { BQ27000, BQ27500, BQ27425, BQ27742, BQ27510};
 
 struct bq27x00_reg_cache {
 	int temperature;
@@ -114,7 +116,7 @@
 	unsigned long last_update;
 	struct delayed_work work;
 
-	struct power_supply	bat;
+	struct power_supply	*bat;
 
 	struct bq27x00_access_methods bus;
 
@@ -174,6 +176,24 @@
 	POWER_SUPPLY_PROP_HEALTH,
 };
 
+static enum power_supply_property bq27510_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_POWER_AVG,
+	POWER_SUPPLY_PROP_HEALTH,
+};
+
 static unsigned int poll_interval = 360;
 module_param(poll_interval, uint, 0644);
 MODULE_PARM_DESC(poll_interval, "battery poll interval in seconds - " \
@@ -198,7 +218,8 @@
  */
 static bool bq27xxx_is_chip_version_higher(struct bq27x00_device_info *di)
 {
-	if (di->chip == BQ27425 || di->chip == BQ27500 || di->chip == BQ27742)
+	if (di->chip == BQ27425 || di->chip == BQ27500 || di->chip == BQ27742
+	    || di->chip == BQ27510)
 		return true;
 	return false;
 }
@@ -213,6 +234,8 @@
 
 	if (di->chip == BQ27500 || di->chip == BQ27742)
 		rsoc = bq27x00_read(di, BQ27500_REG_SOC, false);
+	else if (di->chip == BQ27510)
+		rsoc = bq27x00_read(di, BQ27510_REG_SOC, false);
 	else if (di->chip == BQ27425)
 		rsoc = bq27x00_read(di, BQ27425_REG_SOC, false);
 	else
@@ -286,6 +309,8 @@
 	if (bq27xxx_is_chip_version_higher(di)) {
 		if (di->chip == BQ27425)
 			ilmd = bq27x00_read(di, BQ27425_REG_DCAP, false);
+		else if (di->chip == BQ27510)
+			ilmd = bq27x00_read(di, BQ27510_REG_DCAP, false);
 		else
 			ilmd = bq27x00_read(di, BQ27500_REG_DCAP, false);
 	} else
@@ -354,7 +379,10 @@
 {
 	int cyct;
 
-	cyct = bq27x00_read(di, BQ27x00_REG_CYCT, false);
+	if (di->chip == BQ27510)
+		cyct = bq27x00_read(di, BQ27510_REG_CYCT, false);
+	else
+		cyct = bq27x00_read(di, BQ27x00_REG_CYCT, false);
 	if (cyct < 0)
 		dev_err(di->dev, "error reading cycle count total\n");
 
@@ -425,6 +453,10 @@
 		else
 			tval = POWER_SUPPLY_HEALTH_GOOD;
 		return tval;
+	} else if (di->chip == BQ27510) {
+		if (tval & BQ27500_FLAG_OTC)
+			return POWER_SUPPLY_HEALTH_OVERHEAT;
+		return POWER_SUPPLY_HEALTH_GOOD;
 	} else {
 		if (tval & BQ27000_FLAG_EDV1)
 			tval = POWER_SUPPLY_HEALTH_DEAD;
@@ -440,6 +472,7 @@
 {
 	struct bq27x00_reg_cache cache = {0, };
 	bool is_bq27500 = di->chip == BQ27500;
+	bool is_bq27510 = di->chip == BQ27510;
 	bool is_bq27425 = di->chip == BQ27425;
 	bool is_bq27742 = di->chip == BQ27742;
 	bool flags_1b = !(is_bq27500 || is_bq27742);
@@ -449,7 +482,7 @@
 		/* read error */
 		cache.flags = -1;
 	if (cache.flags >= 0) {
-		if (!is_bq27500 && !is_bq27425 && !is_bq27742
+		if (!is_bq27500 && !is_bq27425 && !is_bq27742 && !is_bq27510
 				&& (cache.flags & BQ27000_FLAG_CI)) {
 			dev_info(di->dev, "battery is not calibrated! ignoring capacity values\n");
 			cache.capacity = -ENODATA;
@@ -461,7 +494,7 @@
 			cache.health = -ENODATA;
 		} else {
 			cache.capacity = bq27x00_battery_read_rsoc(di);
-			if (is_bq27742)
+			if (is_bq27742 || is_bq27510)
 				cache.time_to_empty =
 					bq27x00_battery_read_time(di,
 							BQ27x00_REG_TTE);
@@ -498,7 +531,7 @@
 	}
 
 	if (di->cache.capacity != cache.capacity)
-		power_supply_changed(&di->bat);
+		power_supply_changed(di->bat);
 
 	if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
 		di->cache = cache;
@@ -570,7 +603,7 @@
 			status = POWER_SUPPLY_STATUS_FULL;
 		else if (di->cache.flags & BQ27000_FLAG_CHGS)
 			status = POWER_SUPPLY_STATUS_CHARGING;
-		else if (power_supply_am_i_supplied(&di->bat))
+		else if (power_supply_am_i_supplied(di->bat))
 			status = POWER_SUPPLY_STATUS_NOT_CHARGING;
 		else
 			status = POWER_SUPPLY_STATUS_DISCHARGING;
@@ -642,15 +675,12 @@
 	return 0;
 }
 
-#define to_bq27x00_device_info(x) container_of((x), \
-				struct bq27x00_device_info, bat);
-
 static int bq27x00_battery_get_property(struct power_supply *psy,
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
 	int ret = 0;
-	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
+	struct bq27x00_device_info *di = power_supply_get_drvdata(psy);
 
 	mutex_lock(&di->lock);
 	if (time_is_before_jiffies(di->last_update + 5 * HZ)) {
@@ -728,35 +758,47 @@
 
 static void bq27x00_external_power_changed(struct power_supply *psy)
 {
-	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
+	struct bq27x00_device_info *di = power_supply_get_drvdata(psy);
 
 	cancel_delayed_work_sync(&di->work);
 	schedule_delayed_work(&di->work, 0);
 }
 
-static int bq27x00_powersupply_init(struct bq27x00_device_info *di)
+static int bq27x00_powersupply_init(struct bq27x00_device_info *di,
+				    const char *name)
 {
 	int ret;
+	struct power_supply_desc *psy_desc;
+	struct power_supply_config psy_cfg = { .drv_data = di, };
 
-	di->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	psy_desc = devm_kzalloc(di->dev, sizeof(*psy_desc), GFP_KERNEL);
+	if (!psy_desc)
+		return -ENOMEM;
+
+	psy_desc->name = name;
+	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
 	if (di->chip == BQ27425) {
-		di->bat.properties = bq27425_battery_props;
-		di->bat.num_properties = ARRAY_SIZE(bq27425_battery_props);
+		psy_desc->properties = bq27425_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27425_battery_props);
 	} else if (di->chip == BQ27742) {
-		di->bat.properties = bq27742_battery_props;
-		di->bat.num_properties = ARRAY_SIZE(bq27742_battery_props);
+		psy_desc->properties = bq27742_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27742_battery_props);
+	} else if (di->chip == BQ27510) {
+		psy_desc->properties = bq27510_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27510_battery_props);
 	} else {
-		di->bat.properties = bq27x00_battery_props;
-		di->bat.num_properties = ARRAY_SIZE(bq27x00_battery_props);
+		psy_desc->properties = bq27x00_battery_props;
+		psy_desc->num_properties = ARRAY_SIZE(bq27x00_battery_props);
 	}
-	di->bat.get_property = bq27x00_battery_get_property;
-	di->bat.external_power_changed = bq27x00_external_power_changed;
+	psy_desc->get_property = bq27x00_battery_get_property;
+	psy_desc->external_power_changed = bq27x00_external_power_changed;
 
 	INIT_DELAYED_WORK(&di->work, bq27x00_battery_poll);
 	mutex_init(&di->lock);
 
-	ret = power_supply_register(di->dev, &di->bat);
-	if (ret) {
+	di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg);
+	if (IS_ERR(di->bat)) {
+		ret = PTR_ERR(di->bat);
 		dev_err(di->dev, "failed to register battery: %d\n", ret);
 		return ret;
 	}
@@ -780,7 +822,7 @@
 
 	cancel_delayed_work_sync(&di->work);
 
-	power_supply_unregister(&di->bat);
+	power_supply_unregister(di->bat);
 
 	mutex_destroy(&di->lock);
 }
@@ -844,37 +886,34 @@
 	if (num < 0)
 		return num;
 
-	name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num);
+	name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
 	if (!name) {
 		dev_err(&client->dev, "failed to allocate device name\n");
 		retval = -ENOMEM;
-		goto batt_failed_1;
+		goto batt_failed;
 	}
 
 	di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
 	if (!di) {
 		dev_err(&client->dev, "failed to allocate device info data\n");
 		retval = -ENOMEM;
-		goto batt_failed_2;
+		goto batt_failed;
 	}
 
 	di->id = num;
 	di->dev = &client->dev;
 	di->chip = id->driver_data;
-	di->bat.name = name;
 	di->bus.read = &bq27x00_read_i2c;
 
-	retval = bq27x00_powersupply_init(di);
+	retval = bq27x00_powersupply_init(di, name);
 	if (retval)
-		goto batt_failed_2;
+		goto batt_failed;
 
 	i2c_set_clientdata(client, di);
 
 	return 0;
 
-batt_failed_2:
-	kfree(name);
-batt_failed_1:
+batt_failed:
 	mutex_lock(&battery_mutex);
 	idr_remove(&battery_id, num);
 	mutex_unlock(&battery_mutex);
@@ -888,8 +927,6 @@
 
 	bq27x00_powersupply_unregister(di);
 
-	kfree(di->bat.name);
-
 	mutex_lock(&battery_mutex);
 	idr_remove(&battery_id, di->id);
 	mutex_unlock(&battery_mutex);
@@ -902,6 +939,7 @@
 	{ "bq27500", BQ27500 },
 	{ "bq27425", BQ27425 },
 	{ "bq27742", BQ27742 },
+	{ "bq27510", BQ27510 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, bq27x00_id);
@@ -977,6 +1015,7 @@
 {
 	struct bq27x00_device_info *di;
 	struct bq27000_platform_data *pdata = pdev->dev.platform_data;
+	const char *name;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "no platform_data supplied\n");
@@ -999,10 +1038,10 @@
 	di->dev = &pdev->dev;
 	di->chip = BQ27000;
 
-	di->bat.name = pdata->name ?: dev_name(&pdev->dev);
+	name = pdata->name ?: dev_name(&pdev->dev);
 	di->bus.read = &bq27000_read_platform;
 
-	return bq27x00_powersupply_init(di);
+	return bq27x00_powersupply_init(di, name);
 }
 
 static int bq27000_battery_remove(struct platform_device *pdev)
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 14b0d85..0aed13f 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -103,10 +103,11 @@
 		if (!psy)
 			break;
 
-		ret = psy->get_property(psy,
-				POWER_SUPPLY_PROP_PRESENT, &val);
+		ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_PRESENT,
+				&val);
 		if (ret == 0 && val.intval)
 			present = true;
+		power_supply_put(psy);
 		break;
 	case CM_CHARGER_STAT:
 		for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
@@ -118,8 +119,9 @@
 				continue;
 			}
 
-			ret = psy->get_property(psy, POWER_SUPPLY_PROP_PRESENT,
-					&val);
+			ret = power_supply_get_property(psy,
+				POWER_SUPPLY_PROP_PRESENT, &val);
+			power_supply_put(psy);
 			if (ret == 0 && val.intval) {
 				present = true;
 				break;
@@ -155,7 +157,9 @@
 			continue;
 		}
 
-		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
+		ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_ONLINE,
+				&val);
+		power_supply_put(psy);
 		if (ret == 0 && val.intval) {
 			online = true;
 			break;
@@ -183,8 +187,9 @@
 	if (!fuel_gauge)
 		return -ENODEV;
 
-	ret = fuel_gauge->get_property(fuel_gauge,
+	ret = power_supply_get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_VOLTAGE_NOW, &val);
+	power_supply_put(fuel_gauge);
 	if (ret)
 		return ret;
 
@@ -223,20 +228,26 @@
 		}
 
 		/* 2. The charger should be online (ext-power) */
-		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
+		ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_ONLINE,
+				&val);
 		if (ret) {
 			dev_warn(cm->dev, "Cannot read ONLINE value from %s\n",
 				 cm->desc->psy_charger_stat[i]);
+			power_supply_put(psy);
 			continue;
 		}
-		if (val.intval == 0)
+		if (val.intval == 0) {
+			power_supply_put(psy);
 			continue;
+		}
 
 		/*
 		 * 3. The charger should not be FULL, DISCHARGING,
 		 * or NOT_CHARGING.
 		 */
-		ret = psy->get_property(psy, POWER_SUPPLY_PROP_STATUS, &val);
+		ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_STATUS,
+				&val);
+		power_supply_put(psy);
 		if (ret) {
 			dev_warn(cm->dev, "Cannot read STATUS value from %s\n",
 				 cm->desc->psy_charger_stat[i]);
@@ -264,6 +275,7 @@
 	struct charger_desc *desc = cm->desc;
 	union power_supply_propval val;
 	struct power_supply *fuel_gauge;
+	bool is_full = false;
 	int ret = 0;
 	int uV;
 
@@ -279,30 +291,38 @@
 		val.intval = 0;
 
 		/* Not full if capacity of fuel gauge isn't full */
-		ret = fuel_gauge->get_property(fuel_gauge,
+		ret = power_supply_get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CHARGE_FULL, &val);
-		if (!ret && val.intval > desc->fullbatt_full_capacity)
-			return true;
+		if (!ret && val.intval > desc->fullbatt_full_capacity) {
+			is_full = true;
+			goto out;
+		}
 	}
 
 	/* Full, if it's over the fullbatt voltage */
 	if (desc->fullbatt_uV > 0) {
 		ret = get_batt_uV(cm, &uV);
-		if (!ret && uV >= desc->fullbatt_uV)
-			return true;
+		if (!ret && uV >= desc->fullbatt_uV) {
+			is_full = true;
+			goto out;
+		}
 	}
 
 	/* Full, if the capacity is more than fullbatt_soc */
 	if (desc->fullbatt_soc > 0) {
 		val.intval = 0;
 
-		ret = fuel_gauge->get_property(fuel_gauge,
+		ret = power_supply_get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CAPACITY, &val);
-		if (!ret && val.intval >= desc->fullbatt_soc)
-			return true;
+		if (!ret && val.intval >= desc->fullbatt_soc) {
+			is_full = true;
+			goto out;
+		}
 	}
 
-	return false;
+out:
+	power_supply_put(fuel_gauge);
+	return is_full;
 }
 
 /**
@@ -575,14 +595,18 @@
 					int *temp)
 {
 	struct power_supply *fuel_gauge;
+	int ret;
 
 	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
 	if (!fuel_gauge)
 		return -ENODEV;
 
-	return fuel_gauge->get_property(fuel_gauge,
+	ret = power_supply_get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_TEMP,
 				(union power_supply_propval *)temp);
+	power_supply_put(fuel_gauge);
+
+	return ret;
 }
 
 static int cm_get_battery_temperature(struct charger_manager *cm,
@@ -861,10 +885,9 @@
 		enum power_supply_property psp,
 		union power_supply_propval *val)
 {
-	struct charger_manager *cm = container_of(psy,
-			struct charger_manager, charger_psy);
+	struct charger_manager *cm = power_supply_get_drvdata(psy);
 	struct charger_desc *desc = cm->desc;
-	struct power_supply *fuel_gauge;
+	struct power_supply *fuel_gauge = NULL;
 	int ret = 0;
 	int uV;
 
@@ -900,26 +923,26 @@
 			ret = -ENODEV;
 			break;
 		}
-		ret = fuel_gauge->get_property(fuel_gauge,
+		ret = power_supply_get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CURRENT_NOW, val);
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
 	case POWER_SUPPLY_PROP_TEMP_AMBIENT:
 		return cm_get_battery_temperature(cm, &val->intval);
 	case POWER_SUPPLY_PROP_CAPACITY:
-		fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
-		if (!fuel_gauge) {
-			ret = -ENODEV;
-			break;
-		}
-
 		if (!is_batt_present(cm)) {
 			/* There is no battery. Assume 100% */
 			val->intval = 100;
 			break;
 		}
 
-		ret = fuel_gauge->get_property(fuel_gauge,
+		fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!fuel_gauge) {
+			ret = -ENODEV;
+			break;
+		}
+
+		ret = power_supply_get_property(fuel_gauge,
 					POWER_SUPPLY_PROP_CAPACITY, val);
 		if (ret)
 			break;
@@ -975,7 +998,7 @@
 				break;
 			}
 
-			ret = fuel_gauge->get_property(fuel_gauge,
+			ret = power_supply_get_property(fuel_gauge,
 						POWER_SUPPLY_PROP_CHARGE_NOW,
 						val);
 			if (ret) {
@@ -993,6 +1016,8 @@
 	default:
 		return -EINVAL;
 	}
+	if (fuel_gauge)
+		power_supply_put(fuel_gauge);
 	return ret;
 }
 
@@ -1015,7 +1040,7 @@
 	 */
 };
 
-static struct power_supply psy_default = {
+static const struct power_supply_desc psy_default = {
 	.name = "battery",
 	.type = POWER_SUPPLY_TYPE_BATTERY,
 	.properties = default_charger_props,
@@ -1396,7 +1421,7 @@
 		dev_info(cm->dev, "'%s' regulator's externally_control is %d\n",
 			 charger->regulator_name, charger->externally_control);
 
-		ret = sysfs_create_group(&cm->charger_psy.dev->kobj,
+		ret = sysfs_create_group(&cm->charger_psy->dev.kobj,
 					&charger->attr_g);
 		if (ret < 0) {
 			dev_err(cm->dev, "Cannot create sysfs entry of %s regulator\n",
@@ -1424,13 +1449,13 @@
 	int ret;
 
 	/* Verify whether fuel gauge provides battery temperature */
-	ret = fuel_gauge->get_property(fuel_gauge,
+	ret = power_supply_get_property(fuel_gauge,
 					POWER_SUPPLY_PROP_TEMP, &val);
 
 	if (!ret) {
-		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+		cm->charger_psy_desc.properties[cm->charger_psy_desc.num_properties] =
 				POWER_SUPPLY_PROP_TEMP;
-		cm->charger_psy.num_properties++;
+		cm->charger_psy_desc.num_properties++;
 		cm->desc->measure_battery_temp = true;
 	}
 #ifdef CONFIG_THERMAL
@@ -1441,9 +1466,9 @@
 			return PTR_ERR(cm->tzd_batt);
 
 		/* Use external thermometer */
-		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+		cm->charger_psy_desc.properties[cm->charger_psy_desc.num_properties] =
 				POWER_SUPPLY_PROP_TEMP_AMBIENT;
-		cm->charger_psy.num_properties++;
+		cm->charger_psy_desc.num_properties++;
 		cm->desc->measure_battery_temp = true;
 		ret = 0;
 	}
@@ -1459,7 +1484,7 @@
 	return ret;
 }
 
-static struct of_device_id charger_manager_match[] = {
+static const struct of_device_id charger_manager_match[] = {
 	{
 		.compatible = "charger-manager",
 	},
@@ -1603,6 +1628,7 @@
 	int j = 0;
 	union power_supply_propval val;
 	struct power_supply *fuel_gauge;
+	struct power_supply_config psy_cfg = {};
 
 	if (IS_ERR(desc)) {
 		dev_err(&pdev->dev, "No platform data (desc) found\n");
@@ -1617,6 +1643,7 @@
 	/* Basic Values. Unspecified are Null or 0 */
 	cm->dev = &pdev->dev;
 	cm->desc = desc;
+	psy_cfg.drv_data = cm;
 
 	/* Initialize alarm timer */
 	if (alarmtimer_get_rtcdev()) {
@@ -1672,13 +1699,7 @@
 				desc->psy_charger_stat[i]);
 			return -ENODEV;
 		}
-	}
-
-	fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
-	if (!fuel_gauge) {
-		dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
-			desc->psy_fuel_gauge);
-		return -ENODEV;
+		power_supply_put(psy);
 	}
 
 	if (desc->polling_interval_ms == 0 ||
@@ -1696,40 +1717,46 @@
 
 	platform_set_drvdata(pdev, cm);
 
-	memcpy(&cm->charger_psy, &psy_default, sizeof(psy_default));
+	memcpy(&cm->charger_psy_desc, &psy_default, sizeof(psy_default));
 
 	if (!desc->psy_name)
 		strncpy(cm->psy_name_buf, psy_default.name, PSY_NAME_MAX);
 	else
 		strncpy(cm->psy_name_buf, desc->psy_name, PSY_NAME_MAX);
-	cm->charger_psy.name = cm->psy_name_buf;
+	cm->charger_psy_desc.name = cm->psy_name_buf;
 
 	/* Allocate for psy properties because they may vary */
-	cm->charger_psy.properties = devm_kzalloc(&pdev->dev,
+	cm->charger_psy_desc.properties = devm_kzalloc(&pdev->dev,
 				sizeof(enum power_supply_property)
 				* (ARRAY_SIZE(default_charger_props) +
 				NUM_CHARGER_PSY_OPTIONAL), GFP_KERNEL);
-	if (!cm->charger_psy.properties)
+	if (!cm->charger_psy_desc.properties)
 		return -ENOMEM;
 
-	memcpy(cm->charger_psy.properties, default_charger_props,
+	memcpy(cm->charger_psy_desc.properties, default_charger_props,
 		sizeof(enum power_supply_property) *
 		ARRAY_SIZE(default_charger_props));
-	cm->charger_psy.num_properties = psy_default.num_properties;
+	cm->charger_psy_desc.num_properties = psy_default.num_properties;
 
 	/* Find which optional psy-properties are available */
-	if (!fuel_gauge->get_property(fuel_gauge,
-					  POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
-		cm->charger_psy.properties[cm->charger_psy.num_properties] =
-				POWER_SUPPLY_PROP_CHARGE_NOW;
-		cm->charger_psy.num_properties++;
+	fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
+	if (!fuel_gauge) {
+		dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
+			desc->psy_fuel_gauge);
+		return -ENODEV;
 	}
-	if (!fuel_gauge->get_property(fuel_gauge,
+	if (!power_supply_get_property(fuel_gauge,
+					  POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
+		cm->charger_psy_desc.properties[cm->charger_psy_desc.num_properties] =
+				POWER_SUPPLY_PROP_CHARGE_NOW;
+		cm->charger_psy_desc.num_properties++;
+	}
+	if (!power_supply_get_property(fuel_gauge,
 					  POWER_SUPPLY_PROP_CURRENT_NOW,
 					  &val)) {
-		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+		cm->charger_psy_desc.properties[cm->charger_psy_desc.num_properties] =
 				POWER_SUPPLY_PROP_CURRENT_NOW;
-		cm->charger_psy.num_properties++;
+		cm->charger_psy_desc.num_properties++;
 	}
 
 	ret = cm_init_thermal_data(cm, fuel_gauge);
@@ -1737,14 +1764,16 @@
 		dev_err(&pdev->dev, "Failed to initialize thermal data\n");
 		cm->desc->measure_battery_temp = false;
 	}
+	power_supply_put(fuel_gauge);
 
 	INIT_DELAYED_WORK(&cm->fullbatt_vchk_work, fullbatt_vchk);
 
-	ret = power_supply_register(NULL, &cm->charger_psy);
-	if (ret) {
+	cm->charger_psy = power_supply_register(NULL, &cm->charger_psy_desc,
+						&psy_cfg);
+	if (IS_ERR(cm->charger_psy)) {
 		dev_err(&pdev->dev, "Cannot register charger-manager with name \"%s\"\n",
-			cm->charger_psy.name);
-		return ret;
+			cm->charger_psy_desc.name);
+		return PTR_ERR(cm->charger_psy);
 	}
 
 	/* Register extcon device for charger cable */
@@ -1790,7 +1819,7 @@
 		struct charger_regulator *charger;
 
 		charger = &desc->charger_regulators[i];
-		sysfs_remove_group(&cm->charger_psy.dev->kobj,
+		sysfs_remove_group(&cm->charger_psy->dev.kobj,
 				&charger->attr_g);
 	}
 err_reg_extcon:
@@ -1808,7 +1837,7 @@
 		regulator_put(desc->charger_regulators[i].consumer);
 	}
 
-	power_supply_unregister(&cm->charger_psy);
+	power_supply_unregister(cm->charger_psy);
 
 	return ret;
 }
@@ -1840,7 +1869,7 @@
 	for (i = 0 ; i < desc->num_charger_regulators ; i++)
 		regulator_put(desc->charger_regulators[i].consumer);
 
-	power_supply_unregister(&cm->charger_psy);
+	power_supply_unregister(cm->charger_psy);
 
 	try_charger_enable(cm, false);
 
@@ -1999,7 +2028,7 @@
 	bool found = false;
 
 	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
-		if (!strcmp(psy->name, cm->desc->psy_charger_stat[i])) {
+		if (!strcmp(psy->desc->name, cm->desc->psy_charger_stat[i])) {
 			found = true;
 			break;
 		}
diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c
index 594e4db..2da9ed8 100644
--- a/drivers/power/collie_battery.c
+++ b/drivers/power/collie_battery.c
@@ -30,7 +30,7 @@
 
 struct collie_bat {
 	int status;
-	struct power_supply psy;
+	struct power_supply *psy;
 	int full_chrg;
 
 	struct mutex work_lock; /* protects data */
@@ -98,7 +98,7 @@
 			    union power_supply_propval *val)
 {
 	int ret = 0;
-	struct collie_bat *bat = container_of(psy, struct collie_bat, psy);
+	struct collie_bat *bat = power_supply_get_drvdata(psy);
 
 	if (bat->is_present && !bat->is_present(bat)
 			&& psp != POWER_SUPPLY_PROP_PRESENT) {
@@ -155,14 +155,14 @@
 static void collie_bat_update(struct collie_bat *bat)
 {
 	int old;
-	struct power_supply *psy = &bat->psy;
+	struct power_supply *psy = bat->psy;
 
 	mutex_lock(&bat->work_lock);
 
 	old = bat->status;
 
 	if (bat->is_present && !bat->is_present(bat)) {
-		printk(KERN_NOTICE "%s not present\n", psy->name);
+		printk(KERN_NOTICE "%s not present\n", psy->desc->name);
 		bat->status = POWER_SUPPLY_STATUS_UNKNOWN;
 		bat->full_chrg = -1;
 	} else if (power_supply_am_i_supplied(psy)) {
@@ -220,18 +220,20 @@
 	POWER_SUPPLY_PROP_PRESENT,
 };
 
+static const struct power_supply_desc collie_bat_main_desc = {
+	.name		= "main-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= collie_bat_main_props,
+	.num_properties	= ARRAY_SIZE(collie_bat_main_props),
+	.get_property	= collie_bat_get_property,
+	.external_power_changed = collie_bat_external_power_changed,
+	.use_for_apm	= 1,
+};
+
 static struct collie_bat collie_bat_main = {
 	.status = POWER_SUPPLY_STATUS_DISCHARGING,
 	.full_chrg = -1,
-	.psy = {
-		.name		= "main-battery",
-		.type		= POWER_SUPPLY_TYPE_BATTERY,
-		.properties	= collie_bat_main_props,
-		.num_properties	= ARRAY_SIZE(collie_bat_main_props),
-		.get_property	= collie_bat_get_property,
-		.external_power_changed = collie_bat_external_power_changed,
-		.use_for_apm	= 1,
-	},
+	.psy = NULL,
 
 	.gpio_full = COLLIE_GPIO_CO,
 	.gpio_charge_on = COLLIE_GPIO_CHARGE_ON,
@@ -249,18 +251,19 @@
 	.adc_temp_divider = 10000,
 };
 
+static const struct power_supply_desc collie_bat_bu_desc = {
+	.name		= "backup-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= collie_bat_bu_props,
+	.num_properties	= ARRAY_SIZE(collie_bat_bu_props),
+	.get_property	= collie_bat_get_property,
+	.external_power_changed = collie_bat_external_power_changed,
+};
+
 static struct collie_bat collie_bat_bu = {
 	.status = POWER_SUPPLY_STATUS_UNKNOWN,
 	.full_chrg = -1,
-
-	.psy = {
-		.name		= "backup-battery",
-		.type		= POWER_SUPPLY_TYPE_BATTERY,
-		.properties	= collie_bat_bu_props,
-		.num_properties	= ARRAY_SIZE(collie_bat_bu_props),
-		.get_property	= collie_bat_get_property,
-		.external_power_changed = collie_bat_external_power_changed,
-	},
+	.psy = NULL,
 
 	.gpio_full = -1,
 	.gpio_charge_on = -1,
@@ -319,6 +322,7 @@
 static int collie_bat_probe(struct ucb1x00_dev *dev)
 {
 	int ret;
+	struct power_supply_config psy_main_cfg = {}, psy_bu_cfg = {};
 
 	if (!machine_is_collie())
 		return -ENODEV;
@@ -334,12 +338,23 @@
 
 	INIT_WORK(&bat_work, collie_bat_work);
 
-	ret = power_supply_register(&dev->ucb->dev, &collie_bat_main.psy);
-	if (ret)
+	psy_main_cfg.drv_data = &collie_bat_main;
+	collie_bat_main.psy = power_supply_register(&dev->ucb->dev,
+						    &collie_bat_main_desc,
+						    &psy_main_cfg);
+	if (IS_ERR(collie_bat_main.psy)) {
+		ret = PTR_ERR(collie_bat_main.psy);
 		goto err_psy_reg_main;
-	ret = power_supply_register(&dev->ucb->dev, &collie_bat_bu.psy);
-	if (ret)
+	}
+
+	psy_main_cfg.drv_data = &collie_bat_bu;
+	collie_bat_bu.psy = power_supply_register(&dev->ucb->dev,
+						  &collie_bat_bu_desc,
+						  &psy_bu_cfg);
+	if (IS_ERR(collie_bat_bu.psy)) {
+		ret = PTR_ERR(collie_bat_bu.psy);
 		goto err_psy_reg_bu;
+	}
 
 	ret = request_irq(gpio_to_irq(COLLIE_GPIO_CO),
 				collie_bat_gpio_isr,
@@ -354,9 +369,9 @@
 	return 0;
 
 err_irq:
-	power_supply_unregister(&collie_bat_bu.psy);
+	power_supply_unregister(collie_bat_bu.psy);
 err_psy_reg_bu:
-	power_supply_unregister(&collie_bat_main.psy);
+	power_supply_unregister(collie_bat_main.psy);
 err_psy_reg_main:
 
 	/* see comment in collie_bat_remove */
@@ -369,8 +384,8 @@
 {
 	free_irq(gpio_to_irq(COLLIE_GPIO_CO), &collie_bat_main);
 
-	power_supply_unregister(&collie_bat_bu.psy);
-	power_supply_unregister(&collie_bat_main.psy);
+	power_supply_unregister(collie_bat_bu.psy);
+	power_supply_unregister(collie_bat_main.psy);
 
 	/*
 	 * Now cancel the bat_work.  We won't get any more schedules,
diff --git a/drivers/power/da9030_battery.c b/drivers/power/da9030_battery.c
index 78cd5d6..5ca0f4d 100644
--- a/drivers/power/da9030_battery.c
+++ b/drivers/power/da9030_battery.c
@@ -89,7 +89,8 @@
 };
 
 struct da9030_charger {
-	struct power_supply psy;
+	struct power_supply *psy;
+	struct power_supply_desc psy_desc;
 
 	struct device *master;
 
@@ -245,7 +246,7 @@
 
 	da903x_write(charger->master, DA9030_CHARGE_CONTROL, val);
 
-	power_supply_changed(&charger->psy);
+	power_supply_changed(charger->psy);
 }
 
 static void da9030_charger_check_state(struct da9030_charger *charger)
@@ -341,8 +342,7 @@
 				   enum power_supply_property psp,
 				   union power_supply_propval *val)
 {
-	struct da9030_charger *charger;
-	charger = container_of(psy, struct da9030_charger, psy);
+	struct da9030_charger *charger = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -447,16 +447,16 @@
 
 static void da9030_battery_setup_psy(struct da9030_charger *charger)
 {
-	struct power_supply *psy = &charger->psy;
+	struct power_supply_desc *psy_desc = &charger->psy_desc;
 	struct power_supply_info *info = charger->battery_info;
 
-	psy->name = info->name;
-	psy->use_for_apm = info->use_for_apm;
-	psy->type = POWER_SUPPLY_TYPE_BATTERY;
-	psy->get_property = da9030_battery_get_property;
+	psy_desc->name = info->name;
+	psy_desc->use_for_apm = info->use_for_apm;
+	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+	psy_desc->get_property = da9030_battery_get_property;
 
-	psy->properties = da9030_battery_props;
-	psy->num_properties = ARRAY_SIZE(da9030_battery_props);
+	psy_desc->properties = da9030_battery_props;
+	psy_desc->num_properties = ARRAY_SIZE(da9030_battery_props);
 };
 
 static int da9030_battery_charger_init(struct da9030_charger *charger)
@@ -494,6 +494,7 @@
 static int da9030_battery_probe(struct platform_device *pdev)
 {
 	struct da9030_charger *charger;
+	struct power_supply_config psy_cfg = {};
 	struct da9030_battery_info *pdata = pdev->dev.platform_data;
 	int ret;
 
@@ -541,9 +542,13 @@
 		goto err_notifier;
 
 	da9030_battery_setup_psy(charger);
-	ret = power_supply_register(&pdev->dev, &charger->psy);
-	if (ret)
+	psy_cfg.drv_data = charger;
+	charger->psy = power_supply_register(&pdev->dev, &charger->psy_desc,
+					     &psy_cfg);
+	if (IS_ERR(charger->psy)) {
+		ret = PTR_ERR(charger->psy);
 		goto err_ps_register;
+	}
 
 	charger->debug_file = da9030_bat_create_debugfs(charger);
 	platform_set_drvdata(pdev, charger);
@@ -571,7 +576,7 @@
 				   DA9030_EVENT_CHIOVER | DA9030_EVENT_TBAT);
 	cancel_delayed_work_sync(&charger->work);
 	da9030_set_charge(charger, 0);
-	power_supply_unregister(&charger->psy);
+	power_supply_unregister(charger->psy);
 
 	return 0;
 }
diff --git a/drivers/power/da9052-battery.c b/drivers/power/da9052-battery.c
index d17250f..830ec46 100644
--- a/drivers/power/da9052-battery.c
+++ b/drivers/power/da9052-battery.c
@@ -169,7 +169,7 @@
 
 struct da9052_battery {
 	struct da9052 *da9052;
-	struct power_supply psy;
+	struct power_supply *psy;
 	struct notifier_block nb;
 	int charger_type;
 	int status;
@@ -452,7 +452,7 @@
 
 	if (irq == DA9052_IRQ_CHGEND || irq == DA9052_IRQ_DCIN ||
 	    irq == DA9052_IRQ_VBUS || irq == DA9052_IRQ_TBAT) {
-		power_supply_changed(&bat->psy);
+		power_supply_changed(bat->psy);
 	}
 
 	return IRQ_HANDLED;
@@ -499,8 +499,7 @@
 {
 	int ret;
 	int illegal;
-	struct da9052_battery *bat = container_of(psy, struct da9052_battery,
-						  psy);
+	struct da9052_battery *bat = power_supply_get_drvdata(psy);
 
 	ret = da9052_bat_check_presence(bat, &illegal);
 	if (ret < 0)
@@ -561,7 +560,7 @@
 	POWER_SUPPLY_PROP_TECHNOLOGY,
 };
 
-static struct power_supply template_battery = {
+static struct power_supply_desc psy_desc = {
 	.name		= "da9052-bat",
 	.type		= POWER_SUPPLY_TYPE_BATTERY,
 	.properties	= da9052_bat_props,
@@ -591,6 +590,7 @@
 {
 	struct da9052_pdata *pdata;
 	struct da9052_battery *bat;
+	struct power_supply_config psy_cfg = {};
 	int ret;
 	int i;
 
@@ -599,8 +599,9 @@
 	if (!bat)
 		return -ENOMEM;
 
+	psy_cfg.drv_data = bat;
+
 	bat->da9052 = dev_get_drvdata(pdev->dev.parent);
-	bat->psy = template_battery;
 	bat->charger_type = DA9052_NOCHARGER;
 	bat->status = POWER_SUPPLY_STATUS_UNKNOWN;
 	bat->health = POWER_SUPPLY_HEALTH_UNKNOWN;
@@ -608,9 +609,9 @@
 
 	pdata = bat->da9052->dev->platform_data;
 	if (pdata != NULL && pdata->use_for_apm)
-		bat->psy.use_for_apm = pdata->use_for_apm;
+		psy_desc.use_for_apm = pdata->use_for_apm;
 	else
-		bat->psy.use_for_apm = 1;
+		psy_desc.use_for_apm = 1;
 
 	for (i = 0; i < ARRAY_SIZE(da9052_bat_irqs); i++) {
 		ret = da9052_request_irq(bat->da9052,
@@ -625,9 +626,11 @@
 		}
 	}
 
-	ret = power_supply_register(&pdev->dev, &bat->psy);
-	 if (ret)
+	bat->psy = power_supply_register(&pdev->dev, &psy_desc, &psy_cfg);
+	if (IS_ERR(bat->psy)) {
+		ret = PTR_ERR(bat->psy);
 		goto err;
+	}
 
 	platform_set_drvdata(pdev, bat);
 	return 0;
@@ -646,7 +649,7 @@
 	for (i = 0; i < ARRAY_SIZE(da9052_bat_irqs); i++)
 		da9052_free_irq(bat->da9052, da9052_bat_irq_bits[i], bat);
 
-	power_supply_unregister(&bat->psy);
+	power_supply_unregister(bat->psy);
 
 	return 0;
 }
diff --git a/drivers/power/da9150-charger.c b/drivers/power/da9150-charger.c
new file mode 100644
index 0000000..6009981
--- /dev/null
+++ b/drivers/power/da9150-charger.c
@@ -0,0 +1,694 @@
+/*
+ * DA9150 Charger Driver
+ *
+ * Copyright (c) 2014 Dialog Semiconductor
+ *
+ * Author: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/power_supply.h>
+#include <linux/notifier.h>
+#include <linux/usb/phy.h>
+#include <linux/iio/consumer.h>
+#include <linux/mfd/da9150/core.h>
+#include <linux/mfd/da9150/registers.h>
+
+/* Private data */
+struct da9150_charger {
+	struct da9150 *da9150;
+	struct device *dev;
+
+	struct power_supply *usb;
+	struct power_supply *battery;
+	struct power_supply *supply_online;
+
+	struct usb_phy *usb_phy;
+	struct notifier_block otg_nb;
+	struct work_struct otg_work;
+	unsigned long usb_event;
+
+	struct iio_channel *ibus_chan;
+	struct iio_channel *vbus_chan;
+	struct iio_channel *tjunc_chan;
+	struct iio_channel *vbat_chan;
+};
+
+static inline int da9150_charger_supply_online(struct da9150_charger *charger,
+					       struct power_supply *psy,
+					       union power_supply_propval *val)
+{
+	val->intval = (psy == charger->supply_online) ? 1 : 0;
+
+	return 0;
+}
+
+/* Charger Properties */
+static int da9150_charger_vbus_voltage_now(struct da9150_charger *charger,
+					   union power_supply_propval *val)
+{
+	int v_val, ret;
+
+	/* Read processed value - mV units */
+	ret = iio_read_channel_processed(charger->vbus_chan, &v_val);
+	if (ret < 0)
+		return ret;
+
+	/* Convert voltage to expected uV units */
+	val->intval = v_val * 1000;
+
+	return 0;
+}
+
+static int da9150_charger_ibus_current_avg(struct da9150_charger *charger,
+					   union power_supply_propval *val)
+{
+	int i_val, ret;
+
+	/* Read processed value - mA units */
+	ret = iio_read_channel_processed(charger->ibus_chan, &i_val);
+	if (ret < 0)
+		return ret;
+
+	/* Convert current to expected uA units */
+	val->intval = i_val * 1000;
+
+	return 0;
+}
+
+static int da9150_charger_tjunc_temp(struct da9150_charger *charger,
+				     union power_supply_propval *val)
+{
+	int t_val, ret;
+
+	/* Read processed value - 0.001 degrees C units */
+	ret = iio_read_channel_processed(charger->tjunc_chan, &t_val);
+	if (ret < 0)
+		return ret;
+
+	/* Convert temp to expect 0.1 degrees C units */
+	val->intval = t_val / 100;
+
+	return 0;
+}
+
+static enum power_supply_property da9150_charger_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_TEMP,
+};
+
+static int da9150_charger_get_prop(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   union power_supply_propval *val)
+{
+	struct da9150_charger *charger = dev_get_drvdata(psy->dev.parent);
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = da9150_charger_supply_online(charger, psy, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = da9150_charger_vbus_voltage_now(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_AVG:
+		ret = da9150_charger_ibus_current_avg(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		ret = da9150_charger_tjunc_temp(charger, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+/* Battery Properties */
+static int da9150_charger_battery_status(struct da9150_charger *charger,
+					 union power_supply_propval *val)
+{
+	u8 reg;
+
+	/* Check to see if battery is discharging */
+	reg = da9150_reg_read(charger->da9150, DA9150_STATUS_H);
+
+	if (((reg & DA9150_VBUS_STAT_MASK) == DA9150_VBUS_STAT_OFF) ||
+	    ((reg & DA9150_VBUS_STAT_MASK) == DA9150_VBUS_STAT_WAIT)) {
+		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+
+		return 0;
+	}
+
+	reg = da9150_reg_read(charger->da9150, DA9150_STATUS_J);
+
+	/* Now check for other states */
+	switch (reg & DA9150_CHG_STAT_MASK) {
+	case DA9150_CHG_STAT_ACT:
+	case DA9150_CHG_STAT_PRE:
+	case DA9150_CHG_STAT_CC:
+	case DA9150_CHG_STAT_CV:
+		val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		break;
+	case DA9150_CHG_STAT_OFF:
+	case DA9150_CHG_STAT_SUSP:
+	case DA9150_CHG_STAT_TEMP:
+	case DA9150_CHG_STAT_TIME:
+	case DA9150_CHG_STAT_BAT:
+		val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+	case DA9150_CHG_STAT_FULL:
+		val->intval = POWER_SUPPLY_STATUS_FULL;
+		break;
+	default:
+		val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
+
+static int da9150_charger_battery_health(struct da9150_charger *charger,
+					 union power_supply_propval *val)
+{
+	u8 reg;
+
+	reg = da9150_reg_read(charger->da9150, DA9150_STATUS_J);
+
+	/* Check if temperature limit reached */
+	switch (reg & DA9150_CHG_TEMP_MASK) {
+	case DA9150_CHG_TEMP_UNDER:
+		val->intval = POWER_SUPPLY_HEALTH_COLD;
+		return 0;
+	case DA9150_CHG_TEMP_OVER:
+		val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		return 0;
+	default:
+		break;
+	}
+
+	/* Check for other health states */
+	switch (reg & DA9150_CHG_STAT_MASK) {
+	case DA9150_CHG_STAT_ACT:
+	case DA9150_CHG_STAT_PRE:
+		val->intval = POWER_SUPPLY_HEALTH_DEAD;
+		break;
+	case DA9150_CHG_STAT_TIME:
+		val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		break;
+	default:
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	}
+
+	return 0;
+}
+
+static int da9150_charger_battery_present(struct da9150_charger *charger,
+					  union power_supply_propval *val)
+{
+	u8 reg;
+
+	/* Check if battery present or removed */
+	reg = da9150_reg_read(charger->da9150, DA9150_STATUS_J);
+	if ((reg & DA9150_CHG_STAT_MASK) == DA9150_CHG_STAT_BAT)
+		val->intval = 0;
+	else
+		val->intval = 1;
+
+	return 0;
+}
+
+static int da9150_charger_battery_charge_type(struct da9150_charger *charger,
+					      union power_supply_propval *val)
+{
+	u8 reg;
+
+	reg = da9150_reg_read(charger->da9150, DA9150_STATUS_J);
+
+	switch (reg & DA9150_CHG_STAT_MASK) {
+	case DA9150_CHG_STAT_CC:
+		val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		break;
+	case DA9150_CHG_STAT_ACT:
+	case DA9150_CHG_STAT_PRE:
+	case DA9150_CHG_STAT_CV:
+		val->intval = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		break;
+	default:
+		val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
+	}
+
+	return 0;
+}
+
+static int da9150_charger_battery_voltage_min(struct da9150_charger *charger,
+					      union power_supply_propval *val)
+{
+	u8 reg;
+
+	reg = da9150_reg_read(charger->da9150, DA9150_PPR_CHGCTRL_C);
+
+	/* Value starts at 2500 mV, 50 mV increments, presented in uV */
+	val->intval = ((reg & DA9150_CHG_VFAULT_MASK) * 50000) + 2500000;
+
+	return 0;
+}
+
+static int da9150_charger_battery_voltage_now(struct da9150_charger *charger,
+					      union power_supply_propval *val)
+{
+	int v_val, ret;
+
+	/* Read processed value - mV units */
+	ret = iio_read_channel_processed(charger->vbat_chan, &v_val);
+	if (ret < 0)
+		return ret;
+
+	val->intval = v_val * 1000;
+
+	return 0;
+}
+
+static int da9150_charger_battery_current_max(struct da9150_charger *charger,
+					      union power_supply_propval *val)
+{
+	int reg;
+
+	reg = da9150_reg_read(charger->da9150, DA9150_PPR_CHGCTRL_D);
+
+	/* 25mA increments */
+	val->intval = reg * 25000;
+
+	return 0;
+}
+
+static int da9150_charger_battery_voltage_max(struct da9150_charger *charger,
+					      union power_supply_propval *val)
+{
+	u8 reg;
+
+	reg = da9150_reg_read(charger->da9150, DA9150_PPR_CHGCTRL_B);
+
+	/* Value starts at 3650 mV, 25 mV increments, presented in uV */
+	val->intval = ((reg & DA9150_CHG_VBAT_MASK) * 25000) + 3650000;
+	return 0;
+}
+
+static enum power_supply_property da9150_charger_bat_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
+};
+
+static int da9150_charger_battery_get_prop(struct power_supply *psy,
+					   enum power_supply_property psp,
+					   union power_supply_propval *val)
+{
+	struct da9150_charger *charger = dev_get_drvdata(psy->dev.parent);
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = da9150_charger_battery_status(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = da9150_charger_supply_online(charger, psy, val);
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = da9150_charger_battery_health(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		ret = da9150_charger_battery_present(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		ret = da9150_charger_battery_charge_type(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		ret = da9150_charger_battery_voltage_min(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = da9150_charger_battery_voltage_now(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+		ret = da9150_charger_battery_current_max(charger, val);
+		break;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
+		ret = da9150_charger_battery_voltage_max(charger, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static irqreturn_t da9150_charger_chg_irq(int irq, void *data)
+{
+	struct da9150_charger *charger = data;
+
+	power_supply_changed(charger->battery);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t da9150_charger_tjunc_irq(int irq, void *data)
+{
+	struct da9150_charger *charger = data;
+
+	/* Nothing we can really do except report this. */
+	dev_crit(charger->dev, "TJunc over temperature!!!\n");
+	power_supply_changed(charger->usb);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t da9150_charger_vfault_irq(int irq, void *data)
+{
+	struct da9150_charger *charger = data;
+
+	/* Nothing we can really do except report this. */
+	dev_crit(charger->dev, "VSYS under voltage!!!\n");
+	power_supply_changed(charger->usb);
+	power_supply_changed(charger->battery);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t da9150_charger_vbus_irq(int irq, void *data)
+{
+	struct da9150_charger *charger = data;
+	u8 reg;
+
+	reg = da9150_reg_read(charger->da9150, DA9150_STATUS_H);
+
+	/* Charger plugged in or battery only */
+	switch (reg & DA9150_VBUS_STAT_MASK) {
+	case DA9150_VBUS_STAT_OFF:
+	case DA9150_VBUS_STAT_WAIT:
+		charger->supply_online = charger->battery;
+		break;
+	case DA9150_VBUS_STAT_CHG:
+		charger->supply_online = charger->usb;
+		break;
+	default:
+		dev_warn(charger->dev, "Unknown VBUS state - reg = 0x%x\n",
+			 reg);
+		charger->supply_online = NULL;
+		break;
+	}
+
+	power_supply_changed(charger->usb);
+	power_supply_changed(charger->battery);
+
+	return IRQ_HANDLED;
+}
+
+static void da9150_charger_otg_work(struct work_struct *data)
+{
+	struct da9150_charger *charger =
+		container_of(data, struct da9150_charger, otg_work);
+
+	switch (charger->usb_event) {
+	case USB_EVENT_ID:
+		/* Enable OTG Boost */
+		da9150_set_bits(charger->da9150, DA9150_PPR_BKCTRL_A,
+				DA9150_VBUS_MODE_MASK, DA9150_VBUS_MODE_OTG);
+		break;
+	case USB_EVENT_NONE:
+		/* Revert to charge mode */
+		power_supply_changed(charger->usb);
+		power_supply_changed(charger->battery);
+		da9150_set_bits(charger->da9150, DA9150_PPR_BKCTRL_A,
+				DA9150_VBUS_MODE_MASK, DA9150_VBUS_MODE_CHG);
+		break;
+	}
+}
+
+static int da9150_charger_otg_ncb(struct notifier_block *nb, unsigned long val,
+				  void *priv)
+{
+	struct da9150_charger *charger =
+		container_of(nb, struct da9150_charger, otg_nb);
+
+	dev_dbg(charger->dev, "DA9150 OTG notify %lu\n", val);
+
+	charger->usb_event = val;
+	schedule_work(&charger->otg_work);
+
+	return NOTIFY_OK;
+}
+
+static int da9150_charger_register_irq(struct platform_device *pdev,
+				       irq_handler_t handler,
+				       const char *irq_name)
+{
+	struct device *dev = &pdev->dev;
+	struct da9150_charger *charger = platform_get_drvdata(pdev);
+	int irq, ret;
+
+	irq = platform_get_irq_byname(pdev, irq_name);
+	if (irq < 0) {
+		dev_err(dev, "Failed to get IRQ CHG_STATUS: %d\n", irq);
+		return irq;
+	}
+
+	ret = request_threaded_irq(irq, NULL, handler, IRQF_ONESHOT, irq_name,
+				   charger);
+	if (ret)
+		dev_err(dev, "Failed to request IRQ %d: %d\n", irq, ret);
+
+	return ret;
+}
+
+static void da9150_charger_unregister_irq(struct platform_device *pdev,
+					  const char *irq_name)
+{
+	struct device *dev = &pdev->dev;
+	struct da9150_charger *charger = platform_get_drvdata(pdev);
+	int irq;
+
+	irq = platform_get_irq_byname(pdev, irq_name);
+	if (irq < 0) {
+		dev_err(dev, "Failed to get IRQ CHG_STATUS: %d\n", irq);
+		return;
+	}
+
+	free_irq(irq, charger);
+}
+
+static const struct power_supply_desc usb_desc = {
+	.name		= "da9150-usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= da9150_charger_props,
+	.num_properties	= ARRAY_SIZE(da9150_charger_props),
+	.get_property	= da9150_charger_get_prop,
+};
+
+static const struct power_supply_desc battery_desc = {
+	.name		= "da9150-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= da9150_charger_bat_props,
+	.num_properties	= ARRAY_SIZE(da9150_charger_bat_props),
+	.get_property	= da9150_charger_battery_get_prop,
+};
+
+static int da9150_charger_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct da9150 *da9150 = dev_get_drvdata(dev->parent);
+	struct da9150_charger *charger;
+	u8 reg;
+	int ret;
+
+	charger = devm_kzalloc(dev, sizeof(struct da9150_charger), GFP_KERNEL);
+	if (!charger)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, charger);
+	charger->da9150 = da9150;
+	charger->dev = dev;
+
+	/* Acquire ADC channels */
+	charger->ibus_chan = iio_channel_get(dev, "CHAN_IBUS");
+	if (IS_ERR(charger->ibus_chan)) {
+		ret = PTR_ERR(charger->ibus_chan);
+		goto ibus_chan_fail;
+	}
+
+	charger->vbus_chan = iio_channel_get(dev, "CHAN_VBUS");
+	if (IS_ERR(charger->vbus_chan)) {
+		ret = PTR_ERR(charger->vbus_chan);
+		goto vbus_chan_fail;
+	}
+
+	charger->tjunc_chan = iio_channel_get(dev, "CHAN_TJUNC");
+	if (IS_ERR(charger->tjunc_chan)) {
+		ret = PTR_ERR(charger->tjunc_chan);
+		goto tjunc_chan_fail;
+	}
+
+	charger->vbat_chan = iio_channel_get(dev, "CHAN_VBAT");
+	if (IS_ERR(charger->vbat_chan)) {
+		ret = PTR_ERR(charger->vbat_chan);
+		goto vbat_chan_fail;
+	}
+
+	/* Register power supplies */
+	charger->usb = power_supply_register(dev, &usb_desc, NULL);
+	if (IS_ERR(charger->usb)) {
+		ret = PTR_ERR(charger->usb);
+		goto usb_fail;
+	}
+
+	charger->battery = power_supply_register(dev, &battery_desc, NULL);
+	if (IS_ERR(charger->battery)) {
+		ret = PTR_ERR(charger->battery);
+		goto battery_fail;
+	}
+
+	/* Get initial online supply */
+	reg = da9150_reg_read(da9150, DA9150_STATUS_H);
+
+	switch (reg & DA9150_VBUS_STAT_MASK) {
+	case DA9150_VBUS_STAT_OFF:
+	case DA9150_VBUS_STAT_WAIT:
+		charger->supply_online = charger->battery;
+		break;
+	case DA9150_VBUS_STAT_CHG:
+		charger->supply_online = charger->usb;
+		break;
+	default:
+		dev_warn(dev, "Unknown VBUS state - reg = 0x%x\n", reg);
+		charger->supply_online = NULL;
+		break;
+	}
+
+	/* Setup OTG reporting & configuration */
+	charger->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
+	if (!IS_ERR_OR_NULL(charger->usb_phy)) {
+		INIT_WORK(&charger->otg_work, da9150_charger_otg_work);
+		charger->otg_nb.notifier_call = da9150_charger_otg_ncb;
+		usb_register_notifier(charger->usb_phy, &charger->otg_nb);
+	}
+
+	/* Register IRQs */
+	ret = da9150_charger_register_irq(pdev, da9150_charger_chg_irq,
+					  "CHG_STATUS");
+	if (ret < 0)
+		goto chg_irq_fail;
+
+	ret = da9150_charger_register_irq(pdev, da9150_charger_tjunc_irq,
+					  "CHG_TJUNC");
+	if (ret < 0)
+		goto tjunc_irq_fail;
+
+	ret = da9150_charger_register_irq(pdev, da9150_charger_vfault_irq,
+					  "CHG_VFAULT");
+	if (ret < 0)
+		goto vfault_irq_fail;
+
+	ret = da9150_charger_register_irq(pdev, da9150_charger_vbus_irq,
+					  "CHG_VBUS");
+	if (ret < 0)
+		goto vbus_irq_fail;
+
+	return 0;
+
+
+vbus_irq_fail:
+	da9150_charger_unregister_irq(pdev, "CHG_VFAULT");
+vfault_irq_fail:
+	da9150_charger_unregister_irq(pdev, "CHG_TJUNC");
+tjunc_irq_fail:
+	da9150_charger_unregister_irq(pdev, "CHG_STATUS");
+chg_irq_fail:
+	if (!IS_ERR_OR_NULL(charger->usb_phy))
+		usb_unregister_notifier(charger->usb_phy, &charger->otg_nb);
+battery_fail:
+	power_supply_unregister(charger->usb);
+
+usb_fail:
+	iio_channel_release(charger->vbat_chan);
+
+vbat_chan_fail:
+	iio_channel_release(charger->tjunc_chan);
+
+tjunc_chan_fail:
+	iio_channel_release(charger->vbus_chan);
+
+vbus_chan_fail:
+	iio_channel_release(charger->ibus_chan);
+
+ibus_chan_fail:
+	return ret;
+}
+
+static int da9150_charger_remove(struct platform_device *pdev)
+{
+	struct da9150_charger *charger = platform_get_drvdata(pdev);
+	int irq;
+
+	/* Make sure IRQs are released before unregistering power supplies */
+	irq = platform_get_irq_byname(pdev, "CHG_VBUS");
+	free_irq(irq, charger);
+
+	irq = platform_get_irq_byname(pdev, "CHG_VFAULT");
+	free_irq(irq, charger);
+
+	irq = platform_get_irq_byname(pdev, "CHG_TJUNC");
+	free_irq(irq, charger);
+
+	irq = platform_get_irq_byname(pdev, "CHG_STATUS");
+	free_irq(irq, charger);
+
+	if (!IS_ERR_OR_NULL(charger->usb_phy))
+		usb_unregister_notifier(charger->usb_phy, &charger->otg_nb);
+
+	power_supply_unregister(charger->battery);
+	power_supply_unregister(charger->usb);
+
+	/* Release ADC channels */
+	iio_channel_release(charger->ibus_chan);
+	iio_channel_release(charger->vbus_chan);
+	iio_channel_release(charger->tjunc_chan);
+	iio_channel_release(charger->vbat_chan);
+
+	return 0;
+}
+
+static struct platform_driver da9150_charger_driver = {
+	.driver = {
+		.name = "da9150-charger",
+	},
+	.probe = da9150_charger_probe,
+	.remove = da9150_charger_remove,
+};
+
+module_platform_driver(da9150_charger_driver);
+
+MODULE_DESCRIPTION("Charger Driver for DA9150");
+MODULE_AUTHOR("Adam Thomson <Adam.Thomson.Opensource@diasemi.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 85b4e6e..80f73cc 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -53,7 +53,8 @@
 	int charge_status;		/* POWER_SUPPLY_STATUS_* */
 
 	int full_counter;
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct device *w1_dev;
 	struct workqueue_struct *monitor_wqueue;
 	struct delayed_work monitor_work;
@@ -254,7 +255,7 @@
 	if (di->charge_status == POWER_SUPPLY_STATUS_UNKNOWN)
 		di->full_counter = 0;
 
-	if (power_supply_am_i_supplied(&di->bat)) {
+	if (power_supply_am_i_supplied(di->bat)) {
 		if (di->current_uA > 10000) {
 			di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 			di->full_counter = 0;
@@ -287,7 +288,7 @@
 	}
 
 	if (di->charge_status != old_charge_status)
-		power_supply_changed(&di->bat);
+		power_supply_changed(di->bat);
 }
 
 static void ds2760_battery_write_status(struct ds2760_device_info *di,
@@ -346,12 +347,9 @@
 	queue_delayed_work(di->monitor_wqueue, &di->monitor_work, interval);
 }
 
-#define to_ds2760_device_info(x) container_of((x), struct ds2760_device_info, \
-					      bat);
-
 static void ds2760_battery_external_power_changed(struct power_supply *psy)
 {
-	struct ds2760_device_info *di = to_ds2760_device_info(psy);
+	struct ds2760_device_info *di = power_supply_get_drvdata(psy);
 
 	dev_dbg(di->dev, "%s\n", __func__);
 
@@ -377,7 +375,7 @@
 	 * that error.
 	 */
 
-	if (!power_supply_am_i_supplied(&di->bat))
+	if (!power_supply_am_i_supplied(di->bat))
 		return;
 
 	bias = (signed char) di->current_raw +
@@ -396,7 +394,7 @@
 
 static void ds2760_battery_set_charged(struct power_supply *psy)
 {
-	struct ds2760_device_info *di = to_ds2760_device_info(psy);
+	struct ds2760_device_info *di = power_supply_get_drvdata(psy);
 
 	/* postpone the actual work by 20 secs. This is for debouncing GPIO
 	 * signals and to let the current value settle. See AN4188. */
@@ -407,7 +405,7 @@
 				       enum power_supply_property psp,
 				       union power_supply_propval *val)
 {
-	struct ds2760_device_info *di = to_ds2760_device_info(psy);
+	struct ds2760_device_info *di = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -458,7 +456,7 @@
 				       enum power_supply_property psp,
 				       const union power_supply_propval *val)
 {
-	struct ds2760_device_info *di = to_ds2760_device_info(psy);
+	struct ds2760_device_info *di = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
@@ -508,6 +506,7 @@
 
 static int ds2760_battery_probe(struct platform_device *pdev)
 {
+	struct power_supply_config psy_cfg = {};
 	char status;
 	int retval = 0;
 	struct ds2760_device_info *di;
@@ -520,20 +519,22 @@
 
 	platform_set_drvdata(pdev, di);
 
-	di->dev			= &pdev->dev;
-	di->w1_dev		= pdev->dev.parent;
-	di->bat.name		= dev_name(&pdev->dev);
-	di->bat.type		= POWER_SUPPLY_TYPE_BATTERY;
-	di->bat.properties	= ds2760_battery_props;
-	di->bat.num_properties	= ARRAY_SIZE(ds2760_battery_props);
-	di->bat.get_property	= ds2760_battery_get_property;
-	di->bat.set_property	= ds2760_battery_set_property;
-	di->bat.property_is_writeable =
+	di->dev				= &pdev->dev;
+	di->w1_dev			= pdev->dev.parent;
+	di->bat_desc.name		= dev_name(&pdev->dev);
+	di->bat_desc.type		= POWER_SUPPLY_TYPE_BATTERY;
+	di->bat_desc.properties		= ds2760_battery_props;
+	di->bat_desc.num_properties	= ARRAY_SIZE(ds2760_battery_props);
+	di->bat_desc.get_property	= ds2760_battery_get_property;
+	di->bat_desc.set_property	= ds2760_battery_set_property;
+	di->bat_desc.property_is_writeable =
 				  ds2760_battery_property_is_writeable;
-	di->bat.set_charged	= ds2760_battery_set_charged;
-	di->bat.external_power_changed =
+	di->bat_desc.set_charged	= ds2760_battery_set_charged;
+	di->bat_desc.external_power_changed =
 				  ds2760_battery_external_power_changed;
 
+	psy_cfg.drv_data		= di;
+
 	di->charge_status = POWER_SUPPLY_STATUS_UNKNOWN;
 
 	/* enable sleep mode feature */
@@ -555,9 +556,10 @@
 	if (current_accum)
 		ds2760_battery_set_current_accum(di, current_accum);
 
-	retval = power_supply_register(&pdev->dev, &di->bat);
-	if (retval) {
+	di->bat = power_supply_register(&pdev->dev, &di->bat_desc, &psy_cfg);
+	if (IS_ERR(di->bat)) {
 		dev_err(di->dev, "failed to register battery\n");
+		retval = PTR_ERR(di->bat);
 		goto batt_failed;
 	}
 
@@ -574,7 +576,7 @@
 	goto success;
 
 workqueue_failed:
-	power_supply_unregister(&di->bat);
+	power_supply_unregister(di->bat);
 batt_failed:
 di_alloc_failed:
 success:
@@ -588,7 +590,7 @@
 	cancel_delayed_work_sync(&di->monitor_work);
 	cancel_delayed_work_sync(&di->set_charged_work);
 	destroy_workqueue(di->monitor_wqueue);
-	power_supply_unregister(&di->bat);
+	power_supply_unregister(di->bat);
 
 	return 0;
 }
@@ -610,7 +612,7 @@
 	struct ds2760_device_info *di = platform_get_drvdata(pdev);
 
 	di->charge_status = POWER_SUPPLY_STATUS_UNKNOWN;
-	power_supply_changed(&di->bat);
+	power_supply_changed(di->bat);
 
 	mod_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ);
 
diff --git a/drivers/power/ds2780_battery.c b/drivers/power/ds2780_battery.c
index 9f418fa..a7a0427 100644
--- a/drivers/power/ds2780_battery.c
+++ b/drivers/power/ds2780_battery.c
@@ -37,7 +37,8 @@
 
 struct ds2780_device_info {
 	struct device *dev;
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct device *w1_dev;
 };
 
@@ -52,7 +53,7 @@
 static inline struct ds2780_device_info *
 to_ds2780_device_info(struct power_supply *psy)
 {
-	return container_of(psy, struct ds2780_device_info, bat);
+	return power_supply_get_drvdata(psy);
 }
 
 static inline struct power_supply *to_power_supply(struct device *dev)
@@ -757,6 +758,7 @@
 
 static int ds2780_battery_probe(struct platform_device *pdev)
 {
+	struct power_supply_config psy_cfg = {};
 	int ret = 0;
 	struct ds2780_device_info *dev_info;
 
@@ -770,25 +772,29 @@
 
 	dev_info->dev			= &pdev->dev;
 	dev_info->w1_dev		= pdev->dev.parent;
-	dev_info->bat.name		= dev_name(&pdev->dev);
-	dev_info->bat.type		= POWER_SUPPLY_TYPE_BATTERY;
-	dev_info->bat.properties	= ds2780_battery_props;
-	dev_info->bat.num_properties	= ARRAY_SIZE(ds2780_battery_props);
-	dev_info->bat.get_property	= ds2780_battery_get_property;
+	dev_info->bat_desc.name		= dev_name(&pdev->dev);
+	dev_info->bat_desc.type		= POWER_SUPPLY_TYPE_BATTERY;
+	dev_info->bat_desc.properties	= ds2780_battery_props;
+	dev_info->bat_desc.num_properties = ARRAY_SIZE(ds2780_battery_props);
+	dev_info->bat_desc.get_property	= ds2780_battery_get_property;
 
-	ret = power_supply_register(&pdev->dev, &dev_info->bat);
-	if (ret) {
+	psy_cfg.drv_data		= dev_info;
+
+	dev_info->bat = power_supply_register(&pdev->dev, &dev_info->bat_desc,
+					      &psy_cfg);
+	if (IS_ERR(dev_info->bat)) {
 		dev_err(dev_info->dev, "failed to register battery\n");
+		ret = PTR_ERR(dev_info->bat);
 		goto fail;
 	}
 
-	ret = sysfs_create_group(&dev_info->bat.dev->kobj, &ds2780_attr_group);
+	ret = sysfs_create_group(&dev_info->bat->dev.kobj, &ds2780_attr_group);
 	if (ret) {
 		dev_err(dev_info->dev, "failed to create sysfs group\n");
 		goto fail_unregister;
 	}
 
-	ret = sysfs_create_bin_file(&dev_info->bat.dev->kobj,
+	ret = sysfs_create_bin_file(&dev_info->bat->dev.kobj,
 					&ds2780_param_eeprom_bin_attr);
 	if (ret) {
 		dev_err(dev_info->dev,
@@ -796,7 +802,7 @@
 		goto fail_remove_group;
 	}
 
-	ret = sysfs_create_bin_file(&dev_info->bat.dev->kobj,
+	ret = sysfs_create_bin_file(&dev_info->bat->dev.kobj,
 					&ds2780_user_eeprom_bin_attr);
 	if (ret) {
 		dev_err(dev_info->dev,
@@ -807,12 +813,12 @@
 	return 0;
 
 fail_remove_bin_file:
-	sysfs_remove_bin_file(&dev_info->bat.dev->kobj,
+	sysfs_remove_bin_file(&dev_info->bat->dev.kobj,
 				&ds2780_param_eeprom_bin_attr);
 fail_remove_group:
-	sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group);
+	sysfs_remove_group(&dev_info->bat->dev.kobj, &ds2780_attr_group);
 fail_unregister:
-	power_supply_unregister(&dev_info->bat);
+	power_supply_unregister(dev_info->bat);
 fail:
 	return ret;
 }
@@ -821,10 +827,13 @@
 {
 	struct ds2780_device_info *dev_info = platform_get_drvdata(pdev);
 
-	/* remove attributes */
-	sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group);
+	/*
+	 * Remove attributes before unregistering power supply
+	 * because 'bat' will be freed on power_supply_unregister() call.
+	 */
+	sysfs_remove_group(&dev_info->bat->dev.kobj, &ds2780_attr_group);
 
-	power_supply_unregister(&dev_info->bat);
+	power_supply_unregister(dev_info->bat);
 
 	return 0;
 }
diff --git a/drivers/power/ds2781_battery.c b/drivers/power/ds2781_battery.c
index 0a5acc6..56d583d 100644
--- a/drivers/power/ds2781_battery.c
+++ b/drivers/power/ds2781_battery.c
@@ -35,7 +35,8 @@
 
 struct ds2781_device_info {
 	struct device *dev;
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct device *w1_dev;
 };
 
@@ -50,7 +51,7 @@
 static inline struct ds2781_device_info *
 to_ds2781_device_info(struct power_supply *psy)
 {
-	return container_of(psy, struct ds2781_device_info, bat);
+	return power_supply_get_drvdata(psy);
 }
 
 static inline struct power_supply *to_power_supply(struct device *dev)
@@ -328,7 +329,7 @@
 	if (ret < 0)
 		return ret;
 
-	if (power_supply_am_i_supplied(&dev_info->bat)) {
+	if (power_supply_am_i_supplied(dev_info->bat)) {
 		if (capacity == 100)
 			*status = POWER_SUPPLY_STATUS_FULL;
 		else if (current_uA > 50000)
@@ -752,6 +753,7 @@
 
 static int ds2781_battery_probe(struct platform_device *pdev)
 {
+	struct power_supply_config psy_cfg = {};
 	int ret = 0;
 	struct ds2781_device_info *dev_info;
 
@@ -763,25 +765,29 @@
 
 	dev_info->dev			= &pdev->dev;
 	dev_info->w1_dev		= pdev->dev.parent;
-	dev_info->bat.name		= dev_name(&pdev->dev);
-	dev_info->bat.type		= POWER_SUPPLY_TYPE_BATTERY;
-	dev_info->bat.properties	= ds2781_battery_props;
-	dev_info->bat.num_properties	= ARRAY_SIZE(ds2781_battery_props);
-	dev_info->bat.get_property	= ds2781_battery_get_property;
+	dev_info->bat_desc.name		= dev_name(&pdev->dev);
+	dev_info->bat_desc.type		= POWER_SUPPLY_TYPE_BATTERY;
+	dev_info->bat_desc.properties	= ds2781_battery_props;
+	dev_info->bat_desc.num_properties = ARRAY_SIZE(ds2781_battery_props);
+	dev_info->bat_desc.get_property	= ds2781_battery_get_property;
 
-	ret = power_supply_register(&pdev->dev, &dev_info->bat);
-	if (ret) {
+	psy_cfg.drv_data		= dev_info;
+
+	dev_info->bat = power_supply_register(&pdev->dev, &dev_info->bat_desc,
+						&psy_cfg);
+	if (IS_ERR(dev_info->bat)) {
 		dev_err(dev_info->dev, "failed to register battery\n");
+		ret = PTR_ERR(dev_info->bat);
 		goto fail;
 	}
 
-	ret = sysfs_create_group(&dev_info->bat.dev->kobj, &ds2781_attr_group);
+	ret = sysfs_create_group(&dev_info->bat->dev.kobj, &ds2781_attr_group);
 	if (ret) {
 		dev_err(dev_info->dev, "failed to create sysfs group\n");
 		goto fail_unregister;
 	}
 
-	ret = sysfs_create_bin_file(&dev_info->bat.dev->kobj,
+	ret = sysfs_create_bin_file(&dev_info->bat->dev.kobj,
 					&ds2781_param_eeprom_bin_attr);
 	if (ret) {
 		dev_err(dev_info->dev,
@@ -789,7 +795,7 @@
 		goto fail_remove_group;
 	}
 
-	ret = sysfs_create_bin_file(&dev_info->bat.dev->kobj,
+	ret = sysfs_create_bin_file(&dev_info->bat->dev.kobj,
 					&ds2781_user_eeprom_bin_attr);
 	if (ret) {
 		dev_err(dev_info->dev,
@@ -800,12 +806,12 @@
 	return 0;
 
 fail_remove_bin_file:
-	sysfs_remove_bin_file(&dev_info->bat.dev->kobj,
+	sysfs_remove_bin_file(&dev_info->bat->dev.kobj,
 				&ds2781_param_eeprom_bin_attr);
 fail_remove_group:
-	sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2781_attr_group);
+	sysfs_remove_group(&dev_info->bat->dev.kobj, &ds2781_attr_group);
 fail_unregister:
-	power_supply_unregister(&dev_info->bat);
+	power_supply_unregister(dev_info->bat);
 fail:
 	return ret;
 }
@@ -814,10 +820,13 @@
 {
 	struct ds2781_device_info *dev_info = platform_get_drvdata(pdev);
 
-	/* remove attributes */
-	sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2781_attr_group);
+	/*
+	 * Remove attributes before unregistering power supply
+	 * because 'bat' will be freed on power_supply_unregister() call.
+	 */
+	sysfs_remove_group(&dev_info->bat->dev.kobj, &ds2781_attr_group);
 
-	power_supply_unregister(&dev_info->bat);
+	power_supply_unregister(dev_info->bat);
 
 	return 0;
 }
diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c
index 3969488..ed4d756 100644
--- a/drivers/power/ds2782_battery.c
+++ b/drivers/power/ds2782_battery.c
@@ -53,11 +53,12 @@
 	int (*get_battery_capacity)(struct ds278x_info *info, int *capacity);
 };
 
-#define to_ds278x_info(x) container_of(x, struct ds278x_info, battery)
+#define to_ds278x_info(x) power_supply_get_drvdata(x)
 
 struct ds278x_info {
 	struct i2c_client	*client;
-	struct power_supply	battery;
+	struct power_supply	*battery;
+	struct power_supply_desc	battery_desc;
 	struct ds278x_battery_ops  *ops;
 	struct delayed_work	bat_work;
 	int			id;
@@ -285,7 +286,7 @@
 	ds278x_get_status(info, &info->status);
 
 	if ((old_status != info->status) || (old_capacity != info->capacity))
-		power_supply_changed(&info->battery);
+		power_supply_changed(info->battery);
 }
 
 static void ds278x_bat_work(struct work_struct *work)
@@ -306,7 +307,7 @@
 	POWER_SUPPLY_PROP_TEMP,
 };
 
-static void ds278x_power_supply_init(struct power_supply *battery)
+static void ds278x_power_supply_init(struct power_supply_desc *battery)
 {
 	battery->type			= POWER_SUPPLY_TYPE_BATTERY;
 	battery->properties		= ds278x_battery_props;
@@ -319,8 +320,8 @@
 {
 	struct ds278x_info *info = i2c_get_clientdata(client);
 
-	power_supply_unregister(&info->battery);
-	kfree(info->battery.name);
+	power_supply_unregister(info->battery);
+	kfree(info->battery_desc.name);
 
 	mutex_lock(&battery_lock);
 	idr_remove(&battery_id, info->id);
@@ -377,6 +378,7 @@
 				const struct i2c_device_id *id)
 {
 	struct ds278x_platform_data *pdata = client->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct ds278x_info *info;
 	int ret;
 	int num;
@@ -404,8 +406,9 @@
 		goto fail_info;
 	}
 
-	info->battery.name = kasprintf(GFP_KERNEL, "%s-%d", client->name, num);
-	if (!info->battery.name) {
+	info->battery_desc.name = kasprintf(GFP_KERNEL, "%s-%d",
+					    client->name, num);
+	if (!info->battery_desc.name) {
 		ret = -ENOMEM;
 		goto fail_name;
 	}
@@ -417,16 +420,19 @@
 	info->client = client;
 	info->id = num;
 	info->ops  = &ds278x_ops[id->driver_data];
-	ds278x_power_supply_init(&info->battery);
+	ds278x_power_supply_init(&info->battery_desc);
+	psy_cfg.drv_data = info;
 
 	info->capacity = 100;
 	info->status = POWER_SUPPLY_STATUS_FULL;
 
 	INIT_DELAYED_WORK(&info->bat_work, ds278x_bat_work);
 
-	ret = power_supply_register(&client->dev, &info->battery);
-	if (ret) {
+	info->battery = power_supply_register(&client->dev,
+					      &info->battery_desc, &psy_cfg);
+	if (IS_ERR(info->battery)) {
 		dev_err(&client->dev, "failed to register battery\n");
+		ret = PTR_ERR(info->battery);
 		goto fail_register;
 	} else {
 		schedule_delayed_work(&info->bat_work, DS278x_DELAY);
@@ -435,7 +441,7 @@
 	return 0;
 
 fail_register:
-	kfree(info->battery.name);
+	kfree(info->battery_desc.name);
 fail_name:
 	kfree(info);
 fail_info:
diff --git a/drivers/power/generic-adc-battery.c b/drivers/power/generic-adc-battery.c
index d72733e..fedc581 100644
--- a/drivers/power/generic-adc-battery.c
+++ b/drivers/power/generic-adc-battery.c
@@ -44,7 +44,8 @@
 };
 
 struct gab {
-	struct power_supply	psy;
+	struct power_supply		*psy;
+	struct power_supply_desc	psy_desc;
 	struct iio_channel	*channel[GAB_MAX_CHAN_TYPE];
 	struct gab_platform_data	*pdata;
 	struct delayed_work bat_work;
@@ -55,7 +56,7 @@
 
 static struct gab *to_generic_bat(struct power_supply *psy)
 {
-	return container_of(psy, struct gab, psy);
+	return power_supply_get_drvdata(psy);
 }
 
 static void gab_ext_power_changed(struct power_supply *psy)
@@ -151,7 +152,7 @@
 
 	adc_bat = to_generic_bat(psy);
 	if (!adc_bat) {
-		dev_err(psy->dev, "no battery infos ?!\n");
+		dev_err(&psy->dev, "no battery infos ?!\n");
 		return -EINVAL;
 	}
 	pdata = adc_bat->pdata;
@@ -159,7 +160,7 @@
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
-		gab_get_status(adc_bat);
+		val->intval = gab_get_status(adc_bat);
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_EMPTY_DESIGN:
 		val->intval = 0;
@@ -210,7 +211,7 @@
 	pdata = adc_bat->pdata;
 	status = adc_bat->status;
 
-	is_plugged = power_supply_am_i_supplied(&adc_bat->psy);
+	is_plugged = power_supply_am_i_supplied(adc_bat->psy);
 	adc_bat->cable_plugged = is_plugged;
 
 	if (!is_plugged)
@@ -221,7 +222,7 @@
 		adc_bat->status = POWER_SUPPLY_STATUS_CHARGING;
 
 	if (status != adc_bat->status)
-		power_supply_changed(&adc_bat->psy);
+		power_supply_changed(adc_bat->psy);
 }
 
 static irqreturn_t gab_charged(int irq, void *dev_id)
@@ -239,7 +240,8 @@
 static int gab_probe(struct platform_device *pdev)
 {
 	struct gab *adc_bat;
-	struct power_supply *psy;
+	struct power_supply_desc *psy_desc;
+	struct power_supply_config psy_cfg = {};
 	struct gab_platform_data *pdata = pdev->dev.platform_data;
 	enum power_supply_property *properties;
 	int ret = 0;
@@ -252,32 +254,34 @@
 		return -ENOMEM;
 	}
 
-	psy = &adc_bat->psy;
-	psy->name = pdata->battery_info.name;
+	psy_cfg.drv_data = adc_bat;
+	psy_desc = &adc_bat->psy_desc;
+	psy_desc->name = pdata->battery_info.name;
 
 	/* bootup default values for the battery */
 	adc_bat->cable_plugged = false;
 	adc_bat->status = POWER_SUPPLY_STATUS_DISCHARGING;
-	psy->type = POWER_SUPPLY_TYPE_BATTERY;
-	psy->get_property = gab_get_property;
-	psy->external_power_changed = gab_ext_power_changed;
+	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+	psy_desc->get_property = gab_get_property;
+	psy_desc->external_power_changed = gab_ext_power_changed;
 	adc_bat->pdata = pdata;
 
 	/*
 	 * copying the static properties and allocating extra memory for holding
 	 * the extra configurable properties received from platform data.
 	 */
-	psy->properties = kcalloc(ARRAY_SIZE(gab_props) +
+	psy_desc->properties = kcalloc(ARRAY_SIZE(gab_props) +
 					ARRAY_SIZE(gab_chan_name),
-					sizeof(*psy->properties), GFP_KERNEL);
-	if (!psy->properties) {
+					sizeof(*psy_desc->properties),
+					GFP_KERNEL);
+	if (!psy_desc->properties) {
 		ret = -ENOMEM;
 		goto first_mem_fail;
 	}
 
-	memcpy(psy->properties, gab_props, sizeof(gab_props));
+	memcpy(psy_desc->properties, gab_props, sizeof(gab_props));
 	properties = (enum power_supply_property *)
-				((char *)psy->properties + sizeof(gab_props));
+			((char *)psy_desc->properties + sizeof(gab_props));
 
 	/*
 	 * getting channel from iio and copying the battery properties
@@ -291,7 +295,7 @@
 			adc_bat->channel[chan] = NULL;
 		} else {
 			/* copying properties for supported channels only */
-			memcpy(properties + sizeof(*(psy->properties)) * index,
+			memcpy(properties + sizeof(*(psy_desc->properties)) * index,
 					&gab_dyn_props[chan],
 					sizeof(gab_dyn_props[chan]));
 			index++;
@@ -310,11 +314,13 @@
 	 * as come channels may be not be supported by the device.So
 	 * we need to take care of that.
 	 */
-	psy->num_properties = ARRAY_SIZE(gab_props) + index;
+	psy_desc->num_properties = ARRAY_SIZE(gab_props) + index;
 
-	ret = power_supply_register(&pdev->dev, psy);
-	if (ret)
+	adc_bat->psy = power_supply_register(&pdev->dev, psy_desc, &psy_cfg);
+	if (IS_ERR(adc_bat->psy)) {
+		ret = PTR_ERR(adc_bat->psy);
 		goto err_reg_fail;
+	}
 
 	INIT_DELAYED_WORK(&adc_bat->bat_work, gab_work);
 
@@ -342,14 +348,14 @@
 err_gpio:
 	gpio_free(pdata->gpio_charge_finished);
 gpio_req_fail:
-	power_supply_unregister(psy);
+	power_supply_unregister(adc_bat->psy);
 err_reg_fail:
 	for (chan = 0; chan < ARRAY_SIZE(gab_chan_name); chan++) {
 		if (adc_bat->channel[chan])
 			iio_channel_release(adc_bat->channel[chan]);
 	}
 second_mem_fail:
-	kfree(psy->properties);
+	kfree(psy_desc->properties);
 first_mem_fail:
 	return ret;
 }
@@ -360,7 +366,7 @@
 	struct gab *adc_bat = platform_get_drvdata(pdev);
 	struct gab_platform_data *pdata = adc_bat->pdata;
 
-	power_supply_unregister(&adc_bat->psy);
+	power_supply_unregister(adc_bat->psy);
 
 	if (gpio_is_valid(pdata->gpio_charge_finished)) {
 		free_irq(gpio_to_irq(pdata->gpio_charge_finished), adc_bat);
@@ -372,7 +378,7 @@
 			iio_channel_release(adc_bat->channel[chan]);
 	}
 
-	kfree(adc_bat->psy.properties);
+	kfree(adc_bat->psy_desc.properties);
 	cancel_delayed_work(&adc_bat->bat_work);
 	return 0;
 }
diff --git a/drivers/power/goldfish_battery.c b/drivers/power/goldfish_battery.c
index 29eba88..a50bb98 100644
--- a/drivers/power/goldfish_battery.c
+++ b/drivers/power/goldfish_battery.c
@@ -30,8 +30,8 @@
 	int irq;
 	spinlock_t lock;
 
-	struct power_supply battery;
-	struct power_supply ac;
+	struct power_supply *battery;
+	struct power_supply *ac;
 };
 
 #define GOLDFISH_BATTERY_READ(data, addr) \
@@ -67,8 +67,7 @@
 			enum power_supply_property psp,
 			union power_supply_propval *val)
 {
-	struct goldfish_battery_data *data = container_of(psy,
-		struct goldfish_battery_data, ac);
+	struct goldfish_battery_data *data = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	switch (psp) {
@@ -86,8 +85,7 @@
 				 enum power_supply_property psp,
 				 union power_supply_propval *val)
 {
-	struct goldfish_battery_data *data = container_of(psy,
-		struct goldfish_battery_data, battery);
+	struct goldfish_battery_data *data = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	switch (psp) {
@@ -139,20 +137,36 @@
 	status &= BATTERY_INT_MASK;
 
 	if (status & BATTERY_STATUS_CHANGED)
-		power_supply_changed(&data->battery);
+		power_supply_changed(data->battery);
 	if (status & AC_STATUS_CHANGED)
-		power_supply_changed(&data->ac);
+		power_supply_changed(data->ac);
 
 	spin_unlock_irqrestore(&data->lock, irq_flags);
 	return status ? IRQ_HANDLED : IRQ_NONE;
 }
 
+static const struct power_supply_desc battery_desc = {
+	.properties	= goldfish_battery_props,
+	.num_properties	= ARRAY_SIZE(goldfish_battery_props),
+	.get_property	= goldfish_battery_get_property,
+	.name		= "battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+};
+
+static const struct power_supply_desc ac_desc = {
+	.properties	= goldfish_ac_props,
+	.num_properties	= ARRAY_SIZE(goldfish_ac_props),
+	.get_property	= goldfish_ac_get_property,
+	.name		= "ac",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+};
 
 static int goldfish_battery_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct resource *r;
 	struct goldfish_battery_data *data;
+	struct power_supply_config psy_cfg = {};
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (data == NULL)
@@ -160,18 +174,6 @@
 
 	spin_lock_init(&data->lock);
 
-	data->battery.properties = goldfish_battery_props;
-	data->battery.num_properties = ARRAY_SIZE(goldfish_battery_props);
-	data->battery.get_property = goldfish_battery_get_property;
-	data->battery.name = "battery";
-	data->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-
-	data->ac.properties = goldfish_ac_props;
-	data->ac.num_properties = ARRAY_SIZE(goldfish_ac_props);
-	data->ac.get_property = goldfish_ac_get_property;
-	data->ac.name = "ac";
-	data->ac.type = POWER_SUPPLY_TYPE_MAINS;
-
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL) {
 		dev_err(&pdev->dev, "platform_get_resource failed\n");
@@ -195,14 +197,17 @@
 	if (ret)
 		return ret;
 
-	ret = power_supply_register(&pdev->dev, &data->ac);
-	if (ret)
-		return ret;
+	psy_cfg.drv_data = data;
 
-	ret = power_supply_register(&pdev->dev, &data->battery);
-	if (ret) {
-		power_supply_unregister(&data->ac);
-		return ret;
+	data->ac = power_supply_register(&pdev->dev, &ac_desc, &psy_cfg);
+	if (IS_ERR(data->ac))
+		return PTR_ERR(data->ac);
+
+	data->battery = power_supply_register(&pdev->dev, &battery_desc,
+						&psy_cfg);
+	if (IS_ERR(data->battery)) {
+		power_supply_unregister(data->ac);
+		return PTR_ERR(data->battery);
 	}
 
 	platform_set_drvdata(pdev, data);
@@ -216,8 +221,8 @@
 {
 	struct goldfish_battery_data *data = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&data->battery);
-	power_supply_unregister(&data->ac);
+	power_supply_unregister(data->battery);
+	power_supply_unregister(data->ac);
 	battery_data = NULL;
 	return 0;
 }
diff --git a/drivers/power/gpio-charger.c b/drivers/power/gpio-charger.c
index b7424c8..c5869b1 100644
--- a/drivers/power/gpio-charger.c
+++ b/drivers/power/gpio-charger.c
@@ -32,7 +32,8 @@
 	unsigned int irq;
 	bool wakeup_enabled;
 
-	struct power_supply charger;
+	struct power_supply *charger;
+	struct power_supply_desc charger_desc;
 };
 
 static irqreturn_t gpio_charger_irq(int irq, void *devid)
@@ -46,7 +47,7 @@
 
 static inline struct gpio_charger *psy_to_gpio_charger(struct power_supply *psy)
 {
-	return container_of(psy, struct gpio_charger, charger);
+	return power_supply_get_drvdata(psy);
 }
 
 static int gpio_charger_get_property(struct power_supply *psy,
@@ -127,8 +128,9 @@
 static int gpio_charger_probe(struct platform_device *pdev)
 {
 	const struct gpio_charger_platform_data *pdata = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct gpio_charger *gpio_charger;
-	struct power_supply *charger;
+	struct power_supply_desc *charger_desc;
 	int ret;
 	int irq;
 
@@ -154,16 +156,18 @@
 		return -ENOMEM;
 	}
 
-	charger = &gpio_charger->charger;
+	charger_desc = &gpio_charger->charger_desc;
 
-	charger->name = pdata->name ? pdata->name : "gpio-charger";
-	charger->type = pdata->type;
-	charger->properties = gpio_charger_properties;
-	charger->num_properties = ARRAY_SIZE(gpio_charger_properties);
-	charger->get_property = gpio_charger_get_property;
-	charger->supplied_to = pdata->supplied_to;
-	charger->num_supplicants = pdata->num_supplicants;
-	charger->of_node = pdev->dev.of_node;
+	charger_desc->name = pdata->name ? pdata->name : "gpio-charger";
+	charger_desc->type = pdata->type;
+	charger_desc->properties = gpio_charger_properties;
+	charger_desc->num_properties = ARRAY_SIZE(gpio_charger_properties);
+	charger_desc->get_property = gpio_charger_get_property;
+
+	psy_cfg.supplied_to = pdata->supplied_to;
+	psy_cfg.num_supplicants = pdata->num_supplicants;
+	psy_cfg.of_node = pdev->dev.of_node;
+	psy_cfg.drv_data = gpio_charger;
 
 	ret = gpio_request(pdata->gpio, dev_name(&pdev->dev));
 	if (ret) {
@@ -178,8 +182,10 @@
 
 	gpio_charger->pdata = pdata;
 
-	ret = power_supply_register(&pdev->dev, charger);
-	if (ret < 0) {
+	gpio_charger->charger = power_supply_register(&pdev->dev,
+						      charger_desc, &psy_cfg);
+	if (IS_ERR(gpio_charger->charger)) {
+		ret = PTR_ERR(gpio_charger->charger);
 		dev_err(&pdev->dev, "Failed to register power supply: %d\n",
 			ret);
 		goto err_gpio_free;
@@ -189,7 +195,7 @@
 	if (irq > 0) {
 		ret = request_any_context_irq(irq, gpio_charger_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-				dev_name(&pdev->dev), charger);
+				dev_name(&pdev->dev), gpio_charger->charger);
 		if (ret < 0)
 			dev_warn(&pdev->dev, "Failed to request irq: %d\n", ret);
 		else
@@ -213,9 +219,9 @@
 	struct gpio_charger *gpio_charger = platform_get_drvdata(pdev);
 
 	if (gpio_charger->irq)
-		free_irq(gpio_charger->irq, &gpio_charger->charger);
+		free_irq(gpio_charger->irq, gpio_charger->charger);
 
-	power_supply_unregister(&gpio_charger->charger);
+	power_supply_unregister(gpio_charger->charger);
 
 	gpio_free(gpio_charger->pdata->gpio);
 
@@ -241,7 +247,7 @@
 
 	if (device_may_wakeup(dev) && gpio_charger->wakeup_enabled)
 		disable_irq_wake(gpio_charger->irq);
-	power_supply_changed(&gpio_charger->charger);
+	power_supply_changed(gpio_charger->charger);
 
 	return 0;
 }
diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c
index de3f39e..9fa4acc 100644
--- a/drivers/power/intel_mid_battery.c
+++ b/drivers/power/intel_mid_battery.c
@@ -107,8 +107,8 @@
 	unsigned int batt_prev_charge_full;	/* in mAS */
 	unsigned int batt_charge_rate;		/* in units per second */
 
-	struct power_supply usb;
-	struct power_supply batt;
+	struct power_supply *usb;
+	struct power_supply *batt;
 	int irq;				/* GPE_ID or IRQ# */
 	struct workqueue_struct *monitor_wqueue;
 	struct delayed_work monitor_battery;
@@ -404,8 +404,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct pmic_power_module_info *pbi = container_of(psy,
-				struct pmic_power_module_info, usb);
+	struct pmic_power_module_info *pbi = power_supply_get_drvdata(psy);
 
 	/* update pmic_power_module_info members */
 	pmic_battery_read_status(pbi);
@@ -444,8 +443,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct pmic_power_module_info *pbi = container_of(psy,
-				struct pmic_power_module_info, batt);
+	struct pmic_power_module_info *pbi = power_supply_get_drvdata(psy);
 
 	/* update pmic_power_module_info members */
 	pmic_battery_read_status(pbi);
@@ -640,6 +638,25 @@
 			__func__);
 }
 
+/*
+ * Description of power supplies
+ */
+static const struct power_supply_desc pmic_usb_desc = {
+	.name		= "pmic-usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= pmic_usb_props,
+	.num_properties	= ARRAY_SIZE(pmic_usb_props),
+	.get_property	= pmic_usb_get_property,
+};
+
+static const struct power_supply_desc pmic_batt_desc = {
+	.name		= "pmic-batt",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= pmic_battery_props,
+	.num_properties	= ARRAY_SIZE(pmic_battery_props),
+	.get_property	= pmic_battery_get_property,
+};
+
 /**
  * pmic_battery_probe - pmic battery initialize
  * @irq: pmic battery device irq
@@ -653,6 +670,7 @@
 {
 	int retval = 0;
 	struct pmic_power_module_info *pbi;
+	struct power_supply_config psy_cfg = {};
 
 	dev_dbg(dev, "pmic-battery: found pmic battery device\n");
 
@@ -666,6 +684,7 @@
 	pbi->dev = dev;
 	pbi->irq = irq;
 	dev_set_drvdata(dev, pbi);
+	psy_cfg.drv_data = pbi;
 
 	/* initialize all required framework before enabling interrupts */
 	INIT_WORK(&pbi->handler, pmic_battery_handle_intrpt);
@@ -687,16 +706,12 @@
 	}
 
 	/* register pmic-batt with power supply subsystem */
-	pbi->batt.name = "pmic-batt";
-	pbi->batt.type = POWER_SUPPLY_TYPE_BATTERY;
-	pbi->batt.properties = pmic_battery_props;
-	pbi->batt.num_properties = ARRAY_SIZE(pmic_battery_props);
-	pbi->batt.get_property = pmic_battery_get_property;
-	retval = power_supply_register(dev, &pbi->batt);
-	if (retval) {
+	pbi->batt = power_supply_register(dev, &pmic_usb_desc, &psy_cfg);
+	if (IS_ERR(pbi->batt)) {
 		dev_err(dev,
 			"%s(): failed to register pmic battery device with power supply subsystem\n",
 				__func__);
+		retval = PTR_ERR(pbi->batt);
 		goto power_reg_failed;
 	}
 
@@ -707,16 +722,12 @@
 	queue_delayed_work(pbi->monitor_wqueue, &pbi->monitor_battery, HZ * 1);
 
 	/* register pmic-usb with power supply subsystem */
-	pbi->usb.name = "pmic-usb";
-	pbi->usb.type = POWER_SUPPLY_TYPE_USB;
-	pbi->usb.properties = pmic_usb_props;
-	pbi->usb.num_properties = ARRAY_SIZE(pmic_usb_props);
-	pbi->usb.get_property = pmic_usb_get_property;
-	retval = power_supply_register(dev, &pbi->usb);
-	if (retval) {
+	pbi->usb = power_supply_register(dev, &pmic_batt_desc, &psy_cfg);
+	if (IS_ERR(pbi->usb)) {
 		dev_err(dev,
 			"%s(): failed to register pmic usb device with power supply subsystem\n",
 				__func__);
+		retval = PTR_ERR(pbi->usb);
 		goto power_reg_failed_1;
 	}
 
@@ -728,7 +739,7 @@
 	return retval;
 
 power_reg_failed_1:
-	power_supply_unregister(&pbi->batt);
+	power_supply_unregister(pbi->batt);
 power_reg_failed:
 	cancel_delayed_work_sync(&pbi->monitor_battery);
 requestirq_failed:
@@ -762,8 +773,8 @@
 	cancel_delayed_work_sync(&pbi->monitor_battery);
 	destroy_workqueue(pbi->monitor_wqueue);
 
-	power_supply_unregister(&pbi->usb);
-	power_supply_unregister(&pbi->batt);
+	power_supply_unregister(pbi->usb);
+	power_supply_unregister(pbi->batt);
 
 	cancel_work_sync(&pbi->handler);
 	kfree(pbi);
diff --git a/drivers/power/ipaq_micro_battery.c b/drivers/power/ipaq_micro_battery.c
index 9d69460..f03014e 100644
--- a/drivers/power/ipaq_micro_battery.c
+++ b/drivers/power/ipaq_micro_battery.c
@@ -93,7 +93,7 @@
 
 static int get_capacity(struct power_supply *b)
 {
-	struct micro_battery *mb = dev_get_drvdata(b->dev->parent);
+	struct micro_battery *mb = dev_get_drvdata(b->dev.parent);
 
 	switch (mb->flag & 0x07) {
 	case MICRO_BATT_STATUS_HIGH:
@@ -113,7 +113,7 @@
 
 static int get_status(struct power_supply *b)
 {
-	struct micro_battery *mb = dev_get_drvdata(b->dev->parent);
+	struct micro_battery *mb = dev_get_drvdata(b->dev.parent);
 
 	if (mb->flag == MICRO_BATT_STATUS_UNKNOWN)
 		return POWER_SUPPLY_STATUS_UNKNOWN;
@@ -132,7 +132,7 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
-	struct micro_battery *mb = dev_get_drvdata(b->dev->parent);
+	struct micro_battery *mb = dev_get_drvdata(b->dev.parent);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_TECHNOLOGY:
@@ -180,7 +180,7 @@
 				 enum power_supply_property psp,
 				 union power_supply_propval *val)
 {
-	struct micro_battery *mb = dev_get_drvdata(b->dev->parent);
+	struct micro_battery *mb = dev_get_drvdata(b->dev.parent);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_ONLINE:
@@ -202,7 +202,7 @@
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 };
 
-static struct power_supply micro_batt_power = {
+static const struct power_supply_desc micro_batt_power_desc = {
 	.name			= "main-battery",
 	.type			= POWER_SUPPLY_TYPE_BATTERY,
 	.properties		= micro_batt_power_props,
@@ -215,7 +215,7 @@
 	POWER_SUPPLY_PROP_ONLINE,
 };
 
-static struct power_supply micro_ac_power = {
+static const struct power_supply_desc micro_ac_power_desc = {
 	.name			= "ac",
 	.type			= POWER_SUPPLY_TYPE_MAINS,
 	.properties		= micro_ac_power_props,
@@ -223,9 +223,12 @@
 	.get_property		= micro_ac_get_property,
 };
 
+static struct power_supply *micro_batt_power, *micro_ac_power;
+
 static int micro_batt_probe(struct platform_device *pdev)
 {
 	struct micro_battery *mb;
+	int ret;
 
 	mb = devm_kzalloc(&pdev->dev, sizeof(*mb), GFP_KERNEL);
 	if (!mb)
@@ -233,14 +236,36 @@
 
 	mb->micro = dev_get_drvdata(pdev->dev.parent);
 	mb->wq = create_singlethread_workqueue("ipaq-battery-wq");
+	if (!mb->wq)
+		return -ENOMEM;
+
 	INIT_DELAYED_WORK(&mb->update, micro_battery_work);
 	platform_set_drvdata(pdev, mb);
 	queue_delayed_work(mb->wq, &mb->update, 1);
-	power_supply_register(&pdev->dev, &micro_batt_power);
-	power_supply_register(&pdev->dev, &micro_ac_power);
+
+	micro_batt_power = power_supply_register(&pdev->dev,
+						 &micro_batt_power_desc, NULL);
+	if (IS_ERR(micro_batt_power)) {
+		ret = PTR_ERR(micro_batt_power);
+		goto batt_err;
+	}
+
+	micro_ac_power = power_supply_register(&pdev->dev,
+					       &micro_ac_power_desc, NULL);
+	if (IS_ERR(micro_ac_power)) {
+		ret = PTR_ERR(micro_ac_power);
+		goto ac_err;
+	}
 
 	dev_info(&pdev->dev, "iPAQ micro battery driver\n");
 	return 0;
+
+ac_err:
+	power_supply_unregister(micro_ac_power);
+batt_err:
+	cancel_delayed_work_sync(&mb->update);
+	destroy_workqueue(mb->wq);
+	return ret;
 }
 
 static int micro_batt_remove(struct platform_device *pdev)
@@ -248,9 +273,10 @@
 {
 	struct micro_battery *mb = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&micro_ac_power);
-	power_supply_unregister(&micro_batt_power);
+	power_supply_unregister(micro_ac_power);
+	power_supply_unregister(micro_batt_power);
 	cancel_delayed_work_sync(&mb->update);
+	destroy_workqueue(mb->wq);
 
 	return 0;
 }
diff --git a/drivers/power/isp1704_charger.c b/drivers/power/isp1704_charger.c
index 0b4cf9d..f2a7d97 100644
--- a/drivers/power/isp1704_charger.c
+++ b/drivers/power/isp1704_charger.c
@@ -57,11 +57,12 @@
 };
 
 struct isp1704_charger {
-	struct device		*dev;
-	struct power_supply	psy;
-	struct usb_phy		*phy;
-	struct notifier_block	nb;
-	struct work_struct	work;
+	struct device			*dev;
+	struct power_supply		*psy;
+	struct power_supply_desc	psy_desc;
+	struct usb_phy			*phy;
+	struct notifier_block		nb;
+	struct work_struct		work;
 
 	/* properties */
 	char			model[8];
@@ -259,10 +260,10 @@
 
 			/* detect wall charger */
 			if (isp1704_charger_detect_dcp(isp)) {
-				isp->psy.type = POWER_SUPPLY_TYPE_USB_DCP;
+				isp->psy_desc.type = POWER_SUPPLY_TYPE_USB_DCP;
 				isp->current_max = 1800;
 			} else {
-				isp->psy.type = POWER_SUPPLY_TYPE_USB;
+				isp->psy_desc.type = POWER_SUPPLY_TYPE_USB;
 				isp->current_max = 500;
 			}
 
@@ -271,7 +272,7 @@
 				usb_gadget_connect(isp->phy->otg->gadget);
 		}
 
-		if (isp->psy.type != POWER_SUPPLY_TYPE_USB_DCP) {
+		if (isp->psy_desc.type != POWER_SUPPLY_TYPE_USB_DCP) {
 			/*
 			 * Only 500mA here or high speed chirp
 			 * handshaking may break
@@ -280,14 +281,14 @@
 				isp->current_max = 500;
 
 			if (isp->current_max > 100)
-				isp->psy.type = POWER_SUPPLY_TYPE_USB_CDP;
+				isp->psy_desc.type = POWER_SUPPLY_TYPE_USB_CDP;
 		}
 		break;
 	case USB_EVENT_NONE:
 		isp->online = false;
 		isp->present = 0;
 		isp->current_max = 0;
-		isp->psy.type = POWER_SUPPLY_TYPE_USB;
+		isp->psy_desc.type = POWER_SUPPLY_TYPE_USB;
 
 		/*
 		 * Disable data pullups. We need to prevent the controller from
@@ -306,7 +307,7 @@
 		goto out;
 	}
 
-	power_supply_changed(&isp->psy);
+	power_supply_changed(isp->psy);
 out:
 	mutex_unlock(&lock);
 }
@@ -326,8 +327,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct isp1704_charger *isp =
-		container_of(psy, struct isp1704_charger, psy);
+	struct isp1704_charger *isp = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_PRESENT:
@@ -403,6 +403,7 @@
 {
 	struct isp1704_charger	*isp;
 	int			ret = -ENODEV;
+	struct power_supply_config psy_cfg = {};
 
 	struct isp1704_charger_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device_node *np = pdev->dev.of_node;
@@ -454,15 +455,19 @@
 	if (ret < 0)
 		goto fail1;
 
-	isp->psy.name		= "isp1704";
-	isp->psy.type		= POWER_SUPPLY_TYPE_USB;
-	isp->psy.properties	= power_props;
-	isp->psy.num_properties	= ARRAY_SIZE(power_props);
-	isp->psy.get_property	= isp1704_charger_get_property;
+	isp->psy_desc.name		= "isp1704";
+	isp->psy_desc.type		= POWER_SUPPLY_TYPE_USB;
+	isp->psy_desc.properties	= power_props;
+	isp->psy_desc.num_properties	= ARRAY_SIZE(power_props);
+	isp->psy_desc.get_property	= isp1704_charger_get_property;
 
-	ret = power_supply_register(isp->dev, &isp->psy);
-	if (ret)
+	psy_cfg.drv_data		= isp;
+
+	isp->psy = power_supply_register(isp->dev, &isp->psy_desc, &psy_cfg);
+	if (IS_ERR(isp->psy)) {
+		ret = PTR_ERR(isp->psy);
 		goto fail1;
+	}
 
 	/*
 	 * REVISIT: using work in order to allow the usb notifications to be
@@ -498,7 +503,7 @@
 
 	return 0;
 fail2:
-	power_supply_unregister(&isp->psy);
+	power_supply_unregister(isp->psy);
 fail1:
 	isp1704_charger_set_power(isp, 0);
 fail0:
@@ -512,7 +517,7 @@
 	struct isp1704_charger *isp = platform_get_drvdata(pdev);
 
 	usb_unregister_notifier(isp->phy, &isp->nb);
-	power_supply_unregister(&isp->psy);
+	power_supply_unregister(isp->psy);
 	isp1704_charger_set_power(isp, 0);
 
 	return 0;
diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c
index 9cd391d..abdfc21 100644
--- a/drivers/power/jz4740-battery.c
+++ b/drivers/power/jz4740-battery.c
@@ -46,7 +46,8 @@
 
 	struct completion read_completion;
 
-	struct power_supply battery;
+	struct power_supply *battery;
+	struct power_supply_desc battery_desc;
 	struct delayed_work work;
 
 	struct mutex lock;
@@ -54,7 +55,7 @@
 
 static inline struct jz_battery *psy_to_jz_battery(struct power_supply *psy)
 {
-	return container_of(psy, struct jz_battery, battery);
+	return power_supply_get_drvdata(psy);
 }
 
 static irqreturn_t jz_battery_irq_handler(int irq, void *devid)
@@ -213,7 +214,7 @@
 	}
 
 	if (has_changed)
-		power_supply_changed(&jz_battery->battery);
+		power_supply_changed(jz_battery->battery);
 }
 
 static enum power_supply_property jz_battery_properties[] = {
@@ -242,8 +243,9 @@
 {
 	int ret = 0;
 	struct jz_battery_platform_data *pdata = pdev->dev.parent->platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct jz_battery *jz_battery;
-	struct power_supply *battery;
+	struct power_supply_desc *battery_desc;
 	struct resource *mem;
 
 	if (!pdata) {
@@ -271,14 +273,17 @@
 	if (IS_ERR(jz_battery->base))
 		return PTR_ERR(jz_battery->base);
 
-	battery = &jz_battery->battery;
-	battery->name = pdata->info.name;
-	battery->type = POWER_SUPPLY_TYPE_BATTERY;
-	battery->properties	= jz_battery_properties;
-	battery->num_properties	= ARRAY_SIZE(jz_battery_properties);
-	battery->get_property = jz_battery_get_property;
-	battery->external_power_changed = jz_battery_external_power_changed;
-	battery->use_for_apm = 1;
+	battery_desc = &jz_battery->battery_desc;
+	battery_desc->name = pdata->info.name;
+	battery_desc->type = POWER_SUPPLY_TYPE_BATTERY;
+	battery_desc->properties	= jz_battery_properties;
+	battery_desc->num_properties	= ARRAY_SIZE(jz_battery_properties);
+	battery_desc->get_property	= jz_battery_get_property;
+	battery_desc->external_power_changed =
+					jz_battery_external_power_changed;
+	battery_desc->use_for_apm	= 1;
+
+	psy_cfg.drv_data = jz_battery;
 
 	jz_battery->pdata = pdata;
 	jz_battery->pdev = pdev;
@@ -330,9 +335,11 @@
 	else
 		jz4740_adc_set_config(pdev->dev.parent, JZ_ADC_CONFIG_BAT_MB, 0);
 
-	ret = power_supply_register(&pdev->dev, &jz_battery->battery);
-	if (ret) {
+	jz_battery->battery = power_supply_register(&pdev->dev, battery_desc,
+							&psy_cfg);
+	if (IS_ERR(jz_battery->battery)) {
 		dev_err(&pdev->dev, "power supply battery register failed.\n");
+		ret = PTR_ERR(jz_battery->battery);
 		goto err_free_charge_irq;
 	}
 
@@ -364,7 +371,7 @@
 		gpio_free(jz_battery->pdata->gpio_charge);
 	}
 
-	power_supply_unregister(&jz_battery->battery);
+	power_supply_unregister(jz_battery->battery);
 
 	free_irq(jz_battery->irq, jz_battery);
 
diff --git a/drivers/power/lp8727_charger.c b/drivers/power/lp8727_charger.c
index 32de636..7e741f1 100644
--- a/drivers/power/lp8727_charger.c
+++ b/drivers/power/lp8727_charger.c
@@ -80,9 +80,9 @@
 };
 
 struct lp8727_psy {
-	struct power_supply ac;
-	struct power_supply usb;
-	struct power_supply batt;
+	struct power_supply *ac;
+	struct power_supply *usb;
+	struct power_supply *batt;
 };
 
 struct lp8727_chg {
@@ -242,9 +242,9 @@
 	lp8727_id_detection(pchg, idno, vbus);
 	lp8727_enable_chgdet(pchg);
 
-	power_supply_changed(&pchg->psy->ac);
-	power_supply_changed(&pchg->psy->usb);
-	power_supply_changed(&pchg->psy->batt);
+	power_supply_changed(pchg->psy->ac);
+	power_supply_changed(pchg->psy->usb);
+	power_supply_changed(pchg->psy->batt);
 }
 
 static irqreturn_t lp8727_isr_func(int irq, void *ptr)
@@ -311,12 +311,12 @@
 				       enum power_supply_property psp,
 				       union power_supply_propval *val)
 {
-	struct lp8727_chg *pchg = dev_get_drvdata(psy->dev->parent);
+	struct lp8727_chg *pchg = dev_get_drvdata(psy->dev.parent);
 
 	if (psp != POWER_SUPPLY_PROP_ONLINE)
 		return -EINVAL;
 
-	val->intval = lp8727_is_charger_attached(psy->name, pchg->devid);
+	val->intval = lp8727_is_charger_attached(psy->desc->name, pchg->devid);
 
 	return 0;
 }
@@ -337,14 +337,14 @@
 				       enum power_supply_property psp,
 				       union power_supply_propval *val)
 {
-	struct lp8727_chg *pchg = dev_get_drvdata(psy->dev->parent);
+	struct lp8727_chg *pchg = dev_get_drvdata(psy->dev.parent);
 	struct lp8727_platform_data *pdata = pchg->pdata;
 	enum lp8727_die_temp temp;
 	u8 read;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
-		if (!lp8727_is_charger_attached(psy->name, pchg->devid)) {
+		if (!lp8727_is_charger_attached(psy->desc->name, pchg->devid)) {
 			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 			return 0;
 		}
@@ -400,13 +400,13 @@
 
 static void lp8727_charger_changed(struct power_supply *psy)
 {
-	struct lp8727_chg *pchg = dev_get_drvdata(psy->dev->parent);
+	struct lp8727_chg *pchg = dev_get_drvdata(psy->dev.parent);
 	u8 eoc_level;
 	u8 ichg;
 	u8 val;
 
 	/* skip if no charger exists */
-	if (!lp8727_is_charger_attached(psy->name, pchg->devid))
+	if (!lp8727_is_charger_attached(psy->desc->name, pchg->devid))
 		return;
 
 	/* update charging parameters */
@@ -418,8 +418,34 @@
 	}
 }
 
+static const struct power_supply_desc lp8727_ac_desc = {
+	.name			= "ac",
+	.type			= POWER_SUPPLY_TYPE_MAINS,
+	.properties		= lp8727_charger_prop,
+	.num_properties		= ARRAY_SIZE(lp8727_charger_prop),
+	.get_property		= lp8727_charger_get_property,
+};
+
+static const struct power_supply_desc lp8727_usb_desc = {
+	.name			= "usb",
+	.type			= POWER_SUPPLY_TYPE_USB,
+	.properties		= lp8727_charger_prop,
+	.num_properties		= ARRAY_SIZE(lp8727_charger_prop),
+	.get_property		= lp8727_charger_get_property,
+};
+
+static const struct power_supply_desc lp8727_batt_desc = {
+	.name			= "main_batt",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= lp8727_battery_prop,
+	.num_properties		= ARRAY_SIZE(lp8727_battery_prop),
+	.get_property		= lp8727_battery_get_property,
+	.external_power_changed	= lp8727_charger_changed,
+};
+
 static int lp8727_register_psy(struct lp8727_chg *pchg)
 {
+	struct power_supply_config psy_cfg = {}; /* Only for ac and usb */
 	struct lp8727_psy *psy;
 
 	psy = devm_kzalloc(pchg->dev, sizeof(*psy), GFP_KERNEL);
@@ -428,44 +454,28 @@
 
 	pchg->psy = psy;
 
-	psy->ac.name = "ac";
-	psy->ac.type = POWER_SUPPLY_TYPE_MAINS;
-	psy->ac.properties = lp8727_charger_prop;
-	psy->ac.num_properties = ARRAY_SIZE(lp8727_charger_prop);
-	psy->ac.get_property = lp8727_charger_get_property;
-	psy->ac.supplied_to = battery_supplied_to;
-	psy->ac.num_supplicants = ARRAY_SIZE(battery_supplied_to);
+	psy_cfg.supplied_to = battery_supplied_to;
+	psy_cfg.num_supplicants = ARRAY_SIZE(battery_supplied_to);
 
-	if (power_supply_register(pchg->dev, &psy->ac))
+	psy->ac = power_supply_register(pchg->dev, &lp8727_ac_desc, &psy_cfg);
+	if (IS_ERR(psy->ac))
 		goto err_psy_ac;
 
-	psy->usb.name = "usb";
-	psy->usb.type = POWER_SUPPLY_TYPE_USB;
-	psy->usb.properties = lp8727_charger_prop;
-	psy->usb.num_properties = ARRAY_SIZE(lp8727_charger_prop);
-	psy->usb.get_property = lp8727_charger_get_property;
-	psy->usb.supplied_to = battery_supplied_to;
-	psy->usb.num_supplicants = ARRAY_SIZE(battery_supplied_to);
-
-	if (power_supply_register(pchg->dev, &psy->usb))
+	psy->usb = power_supply_register(pchg->dev, &lp8727_usb_desc,
+					 &psy_cfg);
+	if (IS_ERR(psy->usb))
 		goto err_psy_usb;
 
-	psy->batt.name = "main_batt";
-	psy->batt.type = POWER_SUPPLY_TYPE_BATTERY;
-	psy->batt.properties = lp8727_battery_prop;
-	psy->batt.num_properties = ARRAY_SIZE(lp8727_battery_prop);
-	psy->batt.get_property = lp8727_battery_get_property;
-	psy->batt.external_power_changed = lp8727_charger_changed;
-
-	if (power_supply_register(pchg->dev, &psy->batt))
+	psy->batt = power_supply_register(pchg->dev, &lp8727_batt_desc, NULL);
+	if (IS_ERR(psy->batt))
 		goto err_psy_batt;
 
 	return 0;
 
 err_psy_batt:
-	power_supply_unregister(&psy->usb);
+	power_supply_unregister(psy->usb);
 err_psy_usb:
-	power_supply_unregister(&psy->ac);
+	power_supply_unregister(psy->ac);
 err_psy_ac:
 	return -EPERM;
 }
@@ -477,9 +487,9 @@
 	if (!psy)
 		return;
 
-	power_supply_unregister(&psy->ac);
-	power_supply_unregister(&psy->usb);
-	power_supply_unregister(&psy->batt);
+	power_supply_unregister(psy->ac);
+	power_supply_unregister(psy->usb);
+	power_supply_unregister(psy->batt);
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c
index 21fc233..f5a48fd 100644
--- a/drivers/power/lp8788-charger.c
+++ b/drivers/power/lp8788-charger.c
@@ -105,8 +105,8 @@
  */
 struct lp8788_charger {
 	struct lp8788 *lp;
-	struct power_supply charger;
-	struct power_supply battery;
+	struct power_supply *charger;
+	struct power_supply *battery;
 	struct work_struct charger_work;
 	struct iio_channel *chan[LP8788_NUM_CHG_ADC];
 	struct lp8788_chg_irq irqs[LP8788_MAX_CHG_IRQS];
@@ -148,7 +148,7 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
-	struct lp8788_charger *pchg = dev_get_drvdata(psy->dev->parent);
+	struct lp8788_charger *pchg = dev_get_drvdata(psy->dev.parent);
 	u8 read;
 
 	switch (psp) {
@@ -337,7 +337,7 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
-	struct lp8788_charger *pchg = dev_get_drvdata(psy->dev->parent);
+	struct lp8788_charger *pchg = dev_get_drvdata(psy->dev.parent);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -397,36 +397,50 @@
 	return 0;
 }
 
+static const struct power_supply_desc lp8788_psy_charger_desc = {
+	.name		= LP8788_CHARGER_NAME,
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= lp8788_charger_prop,
+	.num_properties	= ARRAY_SIZE(lp8788_charger_prop),
+	.get_property	= lp8788_charger_get_property,
+};
+
+static const struct power_supply_desc lp8788_psy_battery_desc = {
+	.name		= LP8788_BATTERY_NAME,
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= lp8788_battery_prop,
+	.num_properties	= ARRAY_SIZE(lp8788_battery_prop),
+	.get_property	= lp8788_battery_get_property,
+};
+
 static int lp8788_psy_register(struct platform_device *pdev,
 				struct lp8788_charger *pchg)
 {
-	pchg->charger.name = LP8788_CHARGER_NAME;
-	pchg->charger.type = POWER_SUPPLY_TYPE_MAINS;
-	pchg->charger.properties = lp8788_charger_prop;
-	pchg->charger.num_properties = ARRAY_SIZE(lp8788_charger_prop);
-	pchg->charger.get_property = lp8788_charger_get_property;
-	pchg->charger.supplied_to = battery_supplied_to;
-	pchg->charger.num_supplicants = ARRAY_SIZE(battery_supplied_to);
+	struct power_supply_config charger_cfg = {};
 
-	if (power_supply_register(&pdev->dev, &pchg->charger))
+	charger_cfg.supplied_to = battery_supplied_to;
+	charger_cfg.num_supplicants = ARRAY_SIZE(battery_supplied_to);
+
+	pchg->charger = power_supply_register(&pdev->dev,
+					      &lp8788_psy_charger_desc,
+					      &charger_cfg);
+	if (IS_ERR(pchg->charger))
 		return -EPERM;
 
-	pchg->battery.name = LP8788_BATTERY_NAME;
-	pchg->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	pchg->battery.properties = lp8788_battery_prop;
-	pchg->battery.num_properties = ARRAY_SIZE(lp8788_battery_prop);
-	pchg->battery.get_property = lp8788_battery_get_property;
-
-	if (power_supply_register(&pdev->dev, &pchg->battery))
+	pchg->battery = power_supply_register(&pdev->dev,
+					      &lp8788_psy_battery_desc, NULL);
+	if (IS_ERR(pchg->battery)) {
+		power_supply_unregister(pchg->charger);
 		return -EPERM;
+	}
 
 	return 0;
 }
 
 static void lp8788_psy_unregister(struct lp8788_charger *pchg)
 {
-	power_supply_unregister(&pchg->battery);
-	power_supply_unregister(&pchg->charger);
+	power_supply_unregister(pchg->battery);
+	power_supply_unregister(pchg->charger);
 }
 
 static void lp8788_charger_event(struct work_struct *work)
@@ -470,8 +484,8 @@
 	case LP8788_INT_EOC:
 	case LP8788_INT_BATT_LOW:
 	case LP8788_INT_NO_BATT:
-		power_supply_changed(&pchg->charger);
-		power_supply_changed(&pchg->battery);
+		power_supply_changed(pchg->charger);
+		power_supply_changed(pchg->battery);
 		break;
 	default:
 		break;
diff --git a/drivers/power/ltc2941-battery-gauge.c b/drivers/power/ltc2941-battery-gauge.c
index e31c927..daeb086 100644
--- a/drivers/power/ltc2941-battery-gauge.c
+++ b/drivers/power/ltc2941-battery-gauge.c
@@ -59,7 +59,8 @@
 
 struct ltc294x_info {
 	struct i2c_client *client;	/* I2C Client pointer */
-	struct power_supply supply;	/* Supply pointer */
+	struct power_supply *supply;	/* Supply pointer */
+	struct power_supply_desc supply_desc;	/* Supply description */
 	struct delayed_work work;	/* Work scheduler */
 	int num_regs;	/* Number of registers (chip type) */
 	int id;		/* Identifier of ltc294x chip */
@@ -294,8 +295,7 @@
 				enum power_supply_property prop,
 				union power_supply_propval *val)
 {
-	struct ltc294x_info *info =
-		container_of(psy, struct ltc294x_info, supply);
+	struct ltc294x_info *info = power_supply_get_drvdata(psy);
 
 	switch (prop) {
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
@@ -317,8 +317,7 @@
 	enum power_supply_property psp,
 	const union power_supply_propval *val)
 {
-	struct ltc294x_info *info =
-		container_of(psy, struct ltc294x_info, supply);
+	struct ltc294x_info *info = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
@@ -345,7 +344,7 @@
 
 	if (charge != info->charge) {
 		info->charge = charge;
-		power_supply_changed(&info->supply);
+		power_supply_changed(info->supply);
 	}
 }
 
@@ -371,8 +370,8 @@
 	struct ltc294x_info *info = i2c_get_clientdata(client);
 
 	cancel_delayed_work(&info->work);
-	power_supply_unregister(&info->supply);
-	kfree(info->supply.name);
+	power_supply_unregister(info->supply);
+	kfree(info->supply_desc.name);
 	mutex_lock(&ltc294x_lock);
 	idr_remove(&ltc294x_id, info->id);
 	mutex_unlock(&ltc294x_lock);
@@ -382,6 +381,7 @@
 static int ltc294x_i2c_probe(struct i2c_client *client,
 	const struct i2c_device_id *id)
 {
+	struct power_supply_config psy_cfg = {};
 	struct ltc294x_info *info;
 	int ret;
 	int num;
@@ -406,8 +406,9 @@
 	i2c_set_clientdata(client, info);
 
 	info->num_regs = id->driver_data;
-	info->supply.name = kasprintf(GFP_KERNEL, "%s-%d", client->name, num);
-	if (!info->supply.name) {
+	info->supply_desc.name = kasprintf(GFP_KERNEL, "%s-%d", client->name,
+					   num);
+	if (!info->supply_desc.name) {
 		ret = -ENOMEM;
 		goto fail_name;
 	}
@@ -440,30 +441,32 @@
 	} else {
 		if (prescaler_exp > LTC2941_MAX_PRESCALER_EXP)
 			prescaler_exp = LTC2941_MAX_PRESCALER_EXP;
-		info->Qlsb = ((58 * 50000) / r_sense) /
+		info->Qlsb = ((85 * 50000) / r_sense) /
 				(128 / (1 << prescaler_exp));
 	}
 
 	info->client = client;
 	info->id = num;
-	info->supply.type = POWER_SUPPLY_TYPE_BATTERY;
-	info->supply.properties = ltc294x_properties;
+	info->supply_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+	info->supply_desc.properties = ltc294x_properties;
 	if (info->num_regs >= LTC294X_REG_TEMPERATURE_LSB)
-		info->supply.num_properties =
+		info->supply_desc.num_properties =
 			ARRAY_SIZE(ltc294x_properties);
 	else if (info->num_regs >= LTC294X_REG_CURRENT_LSB)
-		info->supply.num_properties =
+		info->supply_desc.num_properties =
 			ARRAY_SIZE(ltc294x_properties) - 1;
 	else if (info->num_regs >= LTC294X_REG_VOLTAGE_LSB)
-		info->supply.num_properties =
+		info->supply_desc.num_properties =
 			ARRAY_SIZE(ltc294x_properties) - 2;
 	else
-		info->supply.num_properties =
+		info->supply_desc.num_properties =
 			ARRAY_SIZE(ltc294x_properties) - 3;
-	info->supply.get_property = ltc294x_get_property;
-	info->supply.set_property = ltc294x_set_property;
-	info->supply.property_is_writeable = ltc294x_property_is_writeable;
-	info->supply.external_power_changed	= NULL;
+	info->supply_desc.get_property = ltc294x_get_property;
+	info->supply_desc.set_property = ltc294x_set_property;
+	info->supply_desc.property_is_writeable = ltc294x_property_is_writeable;
+	info->supply_desc.external_power_changed	= NULL;
+
+	psy_cfg.drv_data = info;
 
 	INIT_DELAYED_WORK(&info->work, ltc294x_work);
 
@@ -473,9 +476,11 @@
 		goto fail_comm;
 	}
 
-	ret = power_supply_register(&client->dev, &info->supply);
-	if (ret) {
+	info->supply = power_supply_register(&client->dev, &info->supply_desc,
+					     &psy_cfg);
+	if (IS_ERR(info->supply)) {
 		dev_err(&client->dev, "failed to register ltc2941\n");
+		ret = PTR_ERR(info->supply);
 		goto fail_register;
 	} else {
 		schedule_delayed_work(&info->work, LTC294X_WORK_DELAY * HZ);
@@ -484,7 +489,7 @@
 	return 0;
 
 fail_register:
-	kfree(info->supply.name);
+	kfree(info->supply_desc.name);
 fail_comm:
 fail_name:
 fail_info:
diff --git a/drivers/power/max14577_charger.c b/drivers/power/max14577_charger.c
index ef4103e..a36bcaf 100644
--- a/drivers/power/max14577_charger.c
+++ b/drivers/power/max14577_charger.c
@@ -22,12 +22,9 @@
 #include <linux/mfd/max14577.h>
 
 struct max14577_charger {
-	struct device *dev;
-	struct max14577	*max14577;
-	struct power_supply	charger;
-
-	unsigned int		charging_state;
-	unsigned int		battery_state;
+	struct device		*dev;
+	struct max14577		*max14577;
+	struct power_supply	*charger;
 
 	struct max14577_charger_platform_data	*pdata;
 };
@@ -57,10 +54,10 @@
 	}
 }
 
-static int max14577_get_charger_state(struct max14577_charger *chg)
+static int max14577_get_charger_state(struct max14577_charger *chg, int *val)
 {
 	struct regmap *rmap = chg->max14577->regmap;
-	int state = POWER_SUPPLY_STATUS_DISCHARGING;
+	int ret;
 	u8 reg_data;
 
 	/*
@@ -74,23 +71,32 @@
 	 *  - handle properly dead-battery charging (respect timer)
 	 *  - handle timers (fast-charge and prequal) /MBCCHGERR/
 	 */
-	max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL2, &reg_data);
-	if ((reg_data & CHGCTRL2_MBCHOSTEN_MASK) == 0)
-		goto state_set;
+	ret = max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL2, &reg_data);
+	if (ret < 0)
+		goto out;
 
-	max14577_read_reg(rmap, MAX14577_CHG_REG_STATUS3, &reg_data);
+	if ((reg_data & CHGCTRL2_MBCHOSTEN_MASK) == 0) {
+		*val = POWER_SUPPLY_STATUS_DISCHARGING;
+		goto out;
+	}
+
+	ret = max14577_read_reg(rmap, MAX14577_CHG_REG_STATUS3, &reg_data);
+	if (ret < 0)
+		goto out;
+
 	if (reg_data & STATUS3_CGMBC_MASK) {
 		/* Charger or USB-cable is connected */
 		if (reg_data & STATUS3_EOC_MASK)
-			state = POWER_SUPPLY_STATUS_FULL;
+			*val = POWER_SUPPLY_STATUS_FULL;
 		else
-			state = POWER_SUPPLY_STATUS_CHARGING;
-		goto state_set;
+			*val = POWER_SUPPLY_STATUS_CHARGING;
+		goto out;
 	}
 
-state_set:
-	chg->charging_state = state;
-	return state;
+	*val = POWER_SUPPLY_STATUS_DISCHARGING;
+
+out:
+	return ret;
 }
 
 /*
@@ -98,8 +104,10 @@
  *  - POWER_SUPPLY_CHARGE_TYPE_NONE
  *  - POWER_SUPPLY_CHARGE_TYPE_FAST
  */
-static int max14577_get_charge_type(struct max14577_charger *chg)
+static int max14577_get_charge_type(struct max14577_charger *chg, int *val)
 {
+	int ret, charging;
+
 	/*
 	 * TODO: CHARGE_TYPE_TRICKLE (VCHGR_RC or EOC)?
 	 * As spec says:
@@ -108,18 +116,29 @@
 	 *  top-off timer starts. The device continues to trickle
 	 *  charge the battery until the top-off timer runs out."
 	 */
-	if (max14577_get_charger_state(chg) == POWER_SUPPLY_STATUS_CHARGING)
-		return POWER_SUPPLY_CHARGE_TYPE_FAST;
-	return POWER_SUPPLY_CHARGE_TYPE_NONE;
+	ret = max14577_get_charger_state(chg, &charging);
+	if (ret < 0)
+		return ret;
+
+	if (charging == POWER_SUPPLY_STATUS_CHARGING)
+		*val = POWER_SUPPLY_CHARGE_TYPE_FAST;
+	else
+		*val = POWER_SUPPLY_CHARGE_TYPE_NONE;
+
+	return 0;
 }
 
-static int max14577_get_online(struct max14577_charger *chg)
+static int max14577_get_online(struct max14577_charger *chg, int *val)
 {
 	struct regmap *rmap = chg->max14577->regmap;
 	u8 reg_data;
+	int ret;
 	enum max14577_muic_charger_type chg_type;
 
-	max14577_read_reg(rmap, MAX14577_MUIC_REG_STATUS2, &reg_data);
+	ret = max14577_read_reg(rmap, MAX14577_MUIC_REG_STATUS2, &reg_data);
+	if (ret < 0)
+		return ret;
+
 	reg_data = ((reg_data & STATUS2_CHGTYP_MASK) >> STATUS2_CHGTYP_SHIFT);
 	chg_type = maxim_get_charger_type(chg->max14577->dev_type, reg_data);
 	switch (chg_type) {
@@ -129,14 +148,17 @@
 	case MAX14577_CHARGER_TYPE_SPECIAL_1A:
 	case MAX14577_CHARGER_TYPE_DEAD_BATTERY:
 	case MAX77836_CHARGER_TYPE_SPECIAL_BIAS:
-		return 1;
+		*val = 1;
+		break;
 	case MAX14577_CHARGER_TYPE_NONE:
 	case MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT:
 	case MAX14577_CHARGER_TYPE_RESERVED:
 	case MAX77836_CHARGER_TYPE_RESERVED:
 	default:
-		return 0;
+		*val = 0;
 	}
+
+	return 0;
 }
 
 /*
@@ -145,30 +167,38 @@
  *  - POWER_SUPPLY_HEALTH_OVERVOLTAGE
  *  - POWER_SUPPLY_HEALTH_GOOD
  */
-static int max14577_get_battery_health(struct max14577_charger *chg)
+static int max14577_get_battery_health(struct max14577_charger *chg, int *val)
 {
 	struct regmap *rmap = chg->max14577->regmap;
-	int state = POWER_SUPPLY_HEALTH_GOOD;
+	int ret;
 	u8 reg_data;
 	enum max14577_muic_charger_type chg_type;
 
-	max14577_read_reg(rmap, MAX14577_MUIC_REG_STATUS2, &reg_data);
+	ret = max14577_read_reg(rmap, MAX14577_MUIC_REG_STATUS2, &reg_data);
+	if (ret < 0)
+		goto out;
+
 	reg_data = ((reg_data & STATUS2_CHGTYP_MASK) >> STATUS2_CHGTYP_SHIFT);
 	chg_type = maxim_get_charger_type(chg->max14577->dev_type, reg_data);
 	if (chg_type == MAX14577_CHARGER_TYPE_DEAD_BATTERY) {
-		state = POWER_SUPPLY_HEALTH_DEAD;
-		goto state_set;
+		*val = POWER_SUPPLY_HEALTH_DEAD;
+		goto out;
 	}
 
-	max14577_read_reg(rmap, MAX14577_CHG_REG_STATUS3, &reg_data);
+	ret = max14577_read_reg(rmap, MAX14577_CHG_REG_STATUS3, &reg_data);
+	if (ret < 0)
+		goto out;
+
 	if (reg_data & STATUS3_OVP_MASK) {
-		state = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
-		goto state_set;
+		*val = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+		goto out;
 	}
 
-state_set:
-	chg->battery_state = state;
-	return state;
+	/* Not dead, not overvoltage */
+	*val = POWER_SUPPLY_HEALTH_GOOD;
+
+out:
+	return ret;
 }
 
 /*
@@ -176,9 +206,11 @@
  * The max14577 chip doesn't report any status of battery presence.
  * Lets assume that it will always be used with some battery.
  */
-static int max14577_get_present(struct max14577_charger *chg)
+static int max14577_get_present(struct max14577_charger *chg, int *val)
 {
-	return 1;
+	*val = 1;
+
+	return 0;
 }
 
 static int max14577_set_fast_charge_timer(struct max14577_charger *chg,
@@ -389,26 +421,24 @@
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	struct max14577_charger *chg = container_of(psy,
-						  struct max14577_charger,
-						  charger);
+	struct max14577_charger *chg = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
-		val->intval = max14577_get_charger_state(chg);
+		ret = max14577_get_charger_state(chg, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
-		val->intval = max14577_get_charge_type(chg);
+		ret = max14577_get_charge_type(chg, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_HEALTH:
-		val->intval = max14577_get_battery_health(chg);
+		ret = max14577_get_battery_health(chg, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_PRESENT:
-		val->intval = max14577_get_present(chg);
+		ret = max14577_get_present(chg, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_ONLINE:
-		val->intval = max14577_get_online(chg);
+		ret = max14577_get_online(chg, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_MODEL_NAME:
 		BUILD_BUG_ON(ARRAY_SIZE(model_names) != MAXIM_DEVICE_TYPE_NUM);
@@ -424,6 +454,14 @@
 	return ret;
 }
 
+static const struct power_supply_desc max14577_charger_desc = {
+	.name = "max14577-charger",
+	.type = POWER_SUPPLY_TYPE_BATTERY,
+	.properties = max14577_charger_props,
+	.num_properties = ARRAY_SIZE(max14577_charger_props),
+	.get_property = max14577_charger_get_property,
+};
+
 #ifdef CONFIG_OF
 static struct max14577_charger_platform_data *max14577_charger_dt_init(
 		struct platform_device *pdev)
@@ -531,6 +569,7 @@
 static int max14577_charger_probe(struct platform_device *pdev)
 {
 	struct max14577_charger *chg;
+	struct power_supply_config psy_cfg = {};
 	struct max14577 *max14577 = dev_get_drvdata(pdev->dev.parent);
 	int ret;
 
@@ -550,21 +589,18 @@
 	if (ret)
 		return ret;
 
-	chg->charger.name = "max14577-charger",
-	chg->charger.type = POWER_SUPPLY_TYPE_BATTERY,
-	chg->charger.properties = max14577_charger_props,
-	chg->charger.num_properties = ARRAY_SIZE(max14577_charger_props),
-	chg->charger.get_property = max14577_charger_get_property,
-
 	ret = device_create_file(&pdev->dev, &dev_attr_fast_charge_timer);
 	if (ret) {
 		dev_err(&pdev->dev, "failed: create sysfs entry\n");
 		return ret;
 	}
 
-	ret = power_supply_register(&pdev->dev, &chg->charger);
-	if (ret) {
+	psy_cfg.drv_data = chg;
+	chg->charger = power_supply_register(&pdev->dev, &max14577_charger_desc,
+						&psy_cfg);
+	if (IS_ERR(chg->charger)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
+		ret = PTR_ERR(chg->charger);
 		goto err;
 	}
 
@@ -585,7 +621,7 @@
 	struct max14577_charger *chg = platform_get_drvdata(pdev);
 
 	device_remove_file(&pdev->dev, &dev_attr_fast_charge_timer);
-	power_supply_unregister(&chg->charger);
+	power_supply_unregister(chg->charger);
 
 	return 0;
 }
diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c
index 14d4470..8689c80 100644
--- a/drivers/power/max17040_battery.c
+++ b/drivers/power/max17040_battery.c
@@ -40,7 +40,7 @@
 struct max17040_chip {
 	struct i2c_client		*client;
 	struct delayed_work		work;
-	struct power_supply		battery;
+	struct power_supply		*battery;
 	struct max17040_platform_data	*pdata;
 
 	/* State Of Connect */
@@ -57,8 +57,7 @@
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	struct max17040_chip *chip = container_of(psy,
-				struct max17040_chip, battery);
+	struct max17040_chip *chip = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -188,7 +187,8 @@
 	max17040_get_online(chip->client);
 	max17040_get_status(chip->client);
 
-	schedule_delayed_work(&chip->work, MAX17040_DELAY);
+	queue_delayed_work(system_power_efficient_wq, &chip->work,
+			   MAX17040_DELAY);
 }
 
 static enum power_supply_property max17040_battery_props[] = {
@@ -198,12 +198,20 @@
 	POWER_SUPPLY_PROP_CAPACITY,
 };
 
+static const struct power_supply_desc max17040_battery_desc = {
+	.name		= "battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= max17040_get_property,
+	.properties	= max17040_battery_props,
+	.num_properties	= ARRAY_SIZE(max17040_battery_props),
+};
+
 static int max17040_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct power_supply_config psy_cfg = {};
 	struct max17040_chip *chip;
-	int ret;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
 		return -EIO;
@@ -216,24 +224,21 @@
 	chip->pdata = client->dev.platform_data;
 
 	i2c_set_clientdata(client, chip);
+	psy_cfg.drv_data = chip;
 
-	chip->battery.name		= "battery";
-	chip->battery.type		= POWER_SUPPLY_TYPE_BATTERY;
-	chip->battery.get_property	= max17040_get_property;
-	chip->battery.properties	= max17040_battery_props;
-	chip->battery.num_properties	= ARRAY_SIZE(max17040_battery_props);
-
-	ret = power_supply_register(&client->dev, &chip->battery);
-	if (ret) {
+	chip->battery = power_supply_register(&client->dev,
+				&max17040_battery_desc, &psy_cfg);
+	if (IS_ERR(chip->battery)) {
 		dev_err(&client->dev, "failed: power supply register\n");
-		return ret;
+		return PTR_ERR(chip->battery);
 	}
 
 	max17040_reset(client);
 	max17040_get_version(client);
 
 	INIT_DEFERRABLE_WORK(&chip->work, max17040_work);
-	schedule_delayed_work(&chip->work, MAX17040_DELAY);
+	queue_delayed_work(system_power_efficient_wq, &chip->work,
+			   MAX17040_DELAY);
 
 	return 0;
 }
@@ -242,7 +247,7 @@
 {
 	struct max17040_chip *chip = i2c_get_clientdata(client);
 
-	power_supply_unregister(&chip->battery);
+	power_supply_unregister(chip->battery);
 	cancel_delayed_work(&chip->work);
 	return 0;
 }
@@ -263,7 +268,8 @@
 	struct i2c_client *client = to_i2c_client(dev);
 	struct max17040_chip *chip = i2c_get_clientdata(client);
 
-	schedule_delayed_work(&chip->work, MAX17040_DELAY);
+	queue_delayed_work(system_power_efficient_wq, &chip->work,
+			   MAX17040_DELAY);
 	return 0;
 }
 
diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
index 1da6c5f..6cc5e87 100644
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -63,13 +63,10 @@
 #define dP_ACC_100	0x1900
 #define dP_ACC_200	0x3200
 
-#define MAX17042_IC_VERSION	0x0092
-#define MAX17047_IC_VERSION	0x00AC	/* same for max17050 */
-
 struct max17042_chip {
 	struct i2c_client *client;
 	struct regmap *regmap;
-	struct power_supply battery;
+	struct power_supply *battery;
 	enum max170xx_chip_type chip_type;
 	struct max17042_platform_data *pdata;
 	struct work_struct work;
@@ -96,8 +93,7 @@
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	struct max17042_chip *chip = container_of(psy,
-				struct max17042_chip, battery);
+	struct max17042_chip *chip = power_supply_get_drvdata(psy);
 	struct regmap *map = chip->regmap;
 	int ret;
 	u32 data;
@@ -132,7 +128,7 @@
 		val->intval *= 20000; /* Units of LSB = 20mV */
 		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
-		if (chip->chip_type == MAX17042)
+		if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
 			ret = regmap_read(map, MAX17042_V_empty, &data);
 		else
 			ret = regmap_read(map, MAX17047_V_empty, &data);
@@ -272,6 +268,7 @@
 static inline void max10742_unlock_model(struct max17042_chip *chip)
 {
 	struct regmap *map = chip->regmap;
+
 	regmap_write(map, MAX17042_MLOCKReg1, MODEL_UNLOCK1);
 	regmap_write(map, MAX17042_MLOCKReg2, MODEL_UNLOCK2);
 }
@@ -289,6 +286,7 @@
 {
 	struct regmap *map = chip->regmap;
 	int i;
+
 	for (i = 0; i < size; i++)
 		regmap_write(map, addr + i,
 			chip->pdata->config_data->cell_char_tbl[i]);
@@ -379,7 +377,8 @@
 	regmap_write(map, MAX17042_FilterCFG,
 			config->filter_cfg);
 	regmap_write(map, MAX17042_RelaxCFG, config->relax_cfg);
-	if (chip->chip_type == MAX17047)
+	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047 ||
+			chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050)
 		regmap_write(map, MAX17047_FullSOCThr,
 						config->full_soc_thresh);
 }
@@ -392,7 +391,7 @@
 	max17042_write_verify_reg(map, MAX17042_RCOMP0, config->rcomp0);
 	max17042_write_verify_reg(map, MAX17042_TempCo,	config->tcompc0);
 	max17042_write_verify_reg(map, MAX17042_ICHGTerm, config->ichgt_term);
-	if (chip->chip_type == MAX17042) {
+	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042) {
 		regmap_write(map, MAX17042_EmptyTempCo,	config->empty_tempco);
 		max17042_write_verify_reg(map, MAX17042_K_empty0,
 					config->kempty0);
@@ -501,14 +500,14 @@
 
 	max17042_override_por(map, MAX17042_FullCAP, config->fullcap);
 	max17042_override_por(map, MAX17042_FullCAPNom, config->fullcapnom);
-	if (chip->chip_type == MAX17042)
+	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
 		max17042_override_por(map, MAX17042_SOC_empty,
 						config->socempty);
 	max17042_override_por(map, MAX17042_LAvg_empty, config->lavg_empty);
 	max17042_override_por(map, MAX17042_dQacc, config->dqacc);
 	max17042_override_por(map, MAX17042_dPacc, config->dpacc);
 
-	if (chip->chip_type == MAX17042)
+	if (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042)
 		max17042_override_por(map, MAX17042_V_empty, config->vempty);
 	else
 		max17042_override_por(map, MAX17047_V_empty, config->vempty);
@@ -529,7 +528,6 @@
 {
 	struct regmap *map = chip->regmap;
 	int ret;
-	int val;
 
 	max17042_override_por_values(chip);
 	/* After Power up, the MAX17042 requires 500mS in order
@@ -572,8 +570,7 @@
 	max17042_load_new_capacity_params(chip);
 
 	/* Init complete, Clear the POR bit */
-	regmap_read(map, MAX17042_STATUS, &val);
-	regmap_write(map, MAX17042_STATUS, val & (~STATUS_POR_BIT));
+	regmap_update_bits(map, MAX17042_STATUS, STATUS_POR_BIT, 0x0);
 	return 0;
 }
 
@@ -604,7 +601,7 @@
 		max17042_set_soc_threshold(chip, 1);
 	}
 
-	power_supply_changed(&chip->battery);
+	power_supply_changed(chip->battery);
 	return IRQ_HANDLED;
 }
 
@@ -664,10 +661,28 @@
 	.val_format_endian = REGMAP_ENDIAN_NATIVE,
 };
 
+static const struct power_supply_desc max17042_psy_desc = {
+	.name		= "max170xx_battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= max17042_get_property,
+	.properties	= max17042_battery_props,
+	.num_properties	= ARRAY_SIZE(max17042_battery_props),
+};
+
+static const struct power_supply_desc max17042_no_current_sense_psy_desc = {
+	.name		= "max170xx_battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= max17042_get_property,
+	.properties	= max17042_battery_props,
+	.num_properties	= ARRAY_SIZE(max17042_battery_props) - 2,
+};
+
 static int max17042_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	const struct power_supply_desc *max17042_desc = &max17042_psy_desc;
+	struct power_supply_config psy_cfg = {};
 	struct max17042_chip *chip;
 	int ret;
 	int i;
@@ -694,29 +709,13 @@
 	}
 
 	i2c_set_clientdata(client, chip);
-
-	regmap_read(chip->regmap, MAX17042_DevName, &val);
-	if (val == MAX17042_IC_VERSION) {
-		dev_dbg(&client->dev, "chip type max17042 detected\n");
-		chip->chip_type = MAX17042;
-	} else if (val == MAX17047_IC_VERSION) {
-		dev_dbg(&client->dev, "chip type max17047/50 detected\n");
-		chip->chip_type = MAX17047;
-	} else {
-		dev_err(&client->dev, "device version mismatch: %x\n", val);
-		return -EIO;
-	}
-
-	chip->battery.name		= "max170xx_battery";
-	chip->battery.type		= POWER_SUPPLY_TYPE_BATTERY;
-	chip->battery.get_property	= max17042_get_property;
-	chip->battery.properties	= max17042_battery_props;
-	chip->battery.num_properties	= ARRAY_SIZE(max17042_battery_props);
+	chip->chip_type = id->driver_data;
+	psy_cfg.drv_data = chip;
 
 	/* When current is not measured,
 	 * CURRENT_NOW and CURRENT_AVG properties should be invisible. */
 	if (!chip->pdata->enable_current_sense)
-		chip->battery.num_properties -= 2;
+		max17042_desc = &max17042_no_current_sense_psy_desc;
 
 	if (chip->pdata->r_sns == 0)
 		chip->pdata->r_sns = MAX17042_DEFAULT_SNS_RESISTOR;
@@ -733,21 +732,22 @@
 		regmap_write(chip->regmap, MAX17042_LearnCFG, 0x0007);
 	}
 
-	ret = power_supply_register(&client->dev, &chip->battery);
-	if (ret) {
+	chip->battery = power_supply_register(&client->dev, max17042_desc,
+						&psy_cfg);
+	if (IS_ERR(chip->battery)) {
 		dev_err(&client->dev, "failed: power supply register\n");
-		return ret;
+		return PTR_ERR(chip->battery);
 	}
 
 	if (client->irq) {
 		ret = request_threaded_irq(client->irq, NULL,
 					max17042_thread_handler,
 					IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-					chip->battery.name, chip);
+					chip->battery->desc->name, chip);
 		if (!ret) {
-			regmap_read(chip->regmap, MAX17042_CONFIG, &val);
-			val |= CONFIG_ALRT_BIT_ENBL;
-			regmap_write(chip->regmap, MAX17042_CONFIG, val);
+			regmap_update_bits(chip->regmap, MAX17042_CONFIG,
+					CONFIG_ALRT_BIT_ENBL,
+					CONFIG_ALRT_BIT_ENBL);
 			max17042_set_soc_threshold(chip, 1);
 		} else {
 			client->irq = 0;
@@ -773,7 +773,7 @@
 
 	if (client->irq)
 		free_irq(client->irq, chip);
-	power_supply_unregister(&chip->battery);
+	power_supply_unregister(chip->battery);
 	return 0;
 }
 
@@ -823,9 +823,9 @@
 #endif
 
 static const struct i2c_device_id max17042_id[] = {
-	{ "max17042", 0 },
-	{ "max17047", 1 },
-	{ "max17050", 2 },
+	{ "max17042", MAXIM_DEVICE_TYPE_MAX17042 },
+	{ "max17047", MAXIM_DEVICE_TYPE_MAX17047 },
+	{ "max17050", MAXIM_DEVICE_TYPE_MAX17050 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max17042_id);
diff --git a/drivers/power/max77693_charger.c b/drivers/power/max77693_charger.c
index b042970..754879e 100644
--- a/drivers/power/max77693_charger.c
+++ b/drivers/power/max77693_charger.c
@@ -22,14 +22,14 @@
 #include <linux/mfd/max77693.h>
 #include <linux/mfd/max77693-private.h>
 
-static const char *max77693_charger_name		= "max77693-charger";
+#define MAX77693_CHARGER_NAME				"max77693-charger"
 static const char *max77693_charger_model		= "MAX77693";
 static const char *max77693_charger_manufacturer	= "Maxim Integrated";
 
 struct max77693_charger {
 	struct device		*dev;
 	struct max77693_dev	*max77693;
-	struct power_supply	charger;
+	struct power_supply	*charger;
 
 	u32 constant_volt;
 	u32 min_system_volt;
@@ -38,13 +38,14 @@
 	u32 charge_input_threshold_volt;
 };
 
-static int max77693_get_charger_state(struct regmap *regmap)
+static int max77693_get_charger_state(struct regmap *regmap, int *val)
 {
-	int state;
+	int ret;
 	unsigned int data;
 
-	if (regmap_read(regmap, MAX77693_CHG_REG_CHG_DETAILS_01, &data) < 0)
-		return POWER_SUPPLY_STATUS_UNKNOWN;
+	ret = regmap_read(regmap, MAX77693_CHG_REG_CHG_DETAILS_01, &data);
+	if (ret < 0)
+		return ret;
 
 	data &= CHG_DETAILS_01_CHG_MASK;
 	data >>= CHG_DETAILS_01_CHG_SHIFT;
@@ -56,35 +57,36 @@
 	case MAX77693_CHARGING_TOP_OFF:
 	/* In high temp the charging current is reduced, but still charging */
 	case MAX77693_CHARGING_HIGH_TEMP:
-		state = POWER_SUPPLY_STATUS_CHARGING;
+		*val = POWER_SUPPLY_STATUS_CHARGING;
 		break;
 	case MAX77693_CHARGING_DONE:
-		state = POWER_SUPPLY_STATUS_FULL;
+		*val = POWER_SUPPLY_STATUS_FULL;
 		break;
 	case MAX77693_CHARGING_TIMER_EXPIRED:
 	case MAX77693_CHARGING_THERMISTOR_SUSPEND:
-		state = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		*val = POWER_SUPPLY_STATUS_NOT_CHARGING;
 		break;
 	case MAX77693_CHARGING_OFF:
 	case MAX77693_CHARGING_OVER_TEMP:
 	case MAX77693_CHARGING_WATCHDOG_EXPIRED:
-		state = POWER_SUPPLY_STATUS_DISCHARGING;
+		*val = POWER_SUPPLY_STATUS_DISCHARGING;
 		break;
 	case MAX77693_CHARGING_RESERVED:
 	default:
-		state = POWER_SUPPLY_STATUS_UNKNOWN;
+		*val = POWER_SUPPLY_STATUS_UNKNOWN;
 	}
 
-	return state;
+	return 0;
 }
 
-static int max77693_get_charge_type(struct regmap *regmap)
+static int max77693_get_charge_type(struct regmap *regmap, int *val)
 {
-	int state;
+	int ret;
 	unsigned int data;
 
-	if (regmap_read(regmap, MAX77693_CHG_REG_CHG_DETAILS_01, &data) < 0)
-		return POWER_SUPPLY_CHARGE_TYPE_UNKNOWN;
+	ret = regmap_read(regmap, MAX77693_CHG_REG_CHG_DETAILS_01, &data);
+	if (ret < 0)
+		return ret;
 
 	data &= CHG_DETAILS_01_CHG_MASK;
 	data >>= CHG_DETAILS_01_CHG_SHIFT;
@@ -96,13 +98,13 @@
 	 * 100 and 250 mA. It is higher than prequalification current.
 	 */
 	case MAX77693_CHARGING_TOP_OFF:
-		state = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		*val = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
 		break;
 	case MAX77693_CHARGING_FAST_CONST_CURRENT:
 	case MAX77693_CHARGING_FAST_CONST_VOLTAGE:
 	/* In high temp the charging current is reduced, but still charging */
 	case MAX77693_CHARGING_HIGH_TEMP:
-		state = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		*val = POWER_SUPPLY_CHARGE_TYPE_FAST;
 		break;
 	case MAX77693_CHARGING_DONE:
 	case MAX77693_CHARGING_TIMER_EXPIRED:
@@ -110,14 +112,14 @@
 	case MAX77693_CHARGING_OFF:
 	case MAX77693_CHARGING_OVER_TEMP:
 	case MAX77693_CHARGING_WATCHDOG_EXPIRED:
-		state = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		*val = POWER_SUPPLY_CHARGE_TYPE_NONE;
 		break;
 	case MAX77693_CHARGING_RESERVED:
 	default:
-		state = POWER_SUPPLY_CHARGE_TYPE_UNKNOWN;
+		*val = POWER_SUPPLY_CHARGE_TYPE_UNKNOWN;
 	}
 
-	return state;
+	return 0;
 }
 
 /*
@@ -129,69 +131,78 @@
  *  - POWER_SUPPLY_HEALTH_UNKNOWN
  *  - POWER_SUPPLY_HEALTH_UNSPEC_FAILURE
  */
-static int max77693_get_battery_health(struct regmap *regmap)
+static int max77693_get_battery_health(struct regmap *regmap, int *val)
 {
-	int state;
+	int ret;
 	unsigned int data;
 
-	if (regmap_read(regmap, MAX77693_CHG_REG_CHG_DETAILS_01, &data) < 0)
-		return POWER_SUPPLY_HEALTH_UNKNOWN;
+	ret = regmap_read(regmap, MAX77693_CHG_REG_CHG_DETAILS_01, &data);
+	if (ret < 0)
+		return ret;
 
 	data &= CHG_DETAILS_01_BAT_MASK;
 	data >>= CHG_DETAILS_01_BAT_SHIFT;
 
 	switch (data) {
 	case MAX77693_BATTERY_NOBAT:
-		state = POWER_SUPPLY_HEALTH_DEAD;
+		*val = POWER_SUPPLY_HEALTH_DEAD;
 		break;
 	case MAX77693_BATTERY_PREQUALIFICATION:
 	case MAX77693_BATTERY_GOOD:
 	case MAX77693_BATTERY_LOWVOLTAGE:
-		state = POWER_SUPPLY_HEALTH_GOOD;
+		*val = POWER_SUPPLY_HEALTH_GOOD;
 		break;
 	case MAX77693_BATTERY_TIMER_EXPIRED:
 		/*
 		 * Took longer to charge than expected, charging suspended.
 		 * Damaged battery?
 		 */
-		state = POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE;
+		*val = POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE;
 		break;
 	case MAX77693_BATTERY_OVERVOLTAGE:
-		state = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+		*val = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
 		break;
 	case MAX77693_BATTERY_OVERCURRENT:
-		state = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		*val = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
 		break;
 	case MAX77693_BATTERY_RESERVED:
 	default:
-		state = POWER_SUPPLY_HEALTH_UNKNOWN;
+		*val = POWER_SUPPLY_HEALTH_UNKNOWN;
 		break;
 	}
 
-	return state;
+	return 0;
 }
 
-static int max77693_get_present(struct regmap *regmap)
+static int max77693_get_present(struct regmap *regmap, int *val)
 {
 	unsigned int data;
+	int ret;
 
 	/*
 	 * Read CHG_INT_OK register. High DETBAT bit here should be
 	 * equal to value 0x0 in CHG_DETAILS_01/BAT field.
 	 */
-	regmap_read(regmap, MAX77693_CHG_REG_CHG_INT_OK, &data);
-	if (data & CHG_INT_OK_DETBAT_MASK)
-		return 0;
-	return 1;
+	ret = regmap_read(regmap, MAX77693_CHG_REG_CHG_INT_OK, &data);
+	if (ret < 0)
+		return ret;
+
+	*val = (data & CHG_INT_OK_DETBAT_MASK) ? 0 : 1;
+
+	return 0;
 }
 
-static int max77693_get_online(struct regmap *regmap)
+static int max77693_get_online(struct regmap *regmap, int *val)
 {
 	unsigned int data;
+	int ret;
 
-	regmap_read(regmap, MAX77693_CHG_REG_CHG_INT_OK, &data);
-	if (data & CHG_INT_OK_CHGIN_MASK)
-		return 1;
+	ret = regmap_read(regmap, MAX77693_CHG_REG_CHG_INT_OK, &data);
+	if (ret < 0)
+		return ret;
+
+	*val = (data & CHG_INT_OK_CHGIN_MASK) ? 1 : 0;
+
 	return 0;
 }
 
@@ -209,27 +220,25 @@
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	struct max77693_charger *chg = container_of(psy,
-						  struct max77693_charger,
-						  charger);
+	struct max77693_charger *chg = power_supply_get_drvdata(psy);
 	struct regmap *regmap = chg->max77693->regmap;
 	int ret = 0;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
-		val->intval = max77693_get_charger_state(regmap);
+		ret = max77693_get_charger_state(regmap, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
-		val->intval = max77693_get_charge_type(regmap);
+		ret = max77693_get_charge_type(regmap, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_HEALTH:
-		val->intval = max77693_get_battery_health(regmap);
+		ret = max77693_get_battery_health(regmap, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_PRESENT:
-		val->intval = max77693_get_present(regmap);
+		ret = max77693_get_present(regmap, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_ONLINE:
-		val->intval = max77693_get_online(regmap);
+		ret = max77693_get_online(regmap, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_MODEL_NAME:
 		val->strval = max77693_charger_model;
@@ -244,6 +253,14 @@
 	return ret;
 }
 
+static const struct power_supply_desc max77693_charger_desc = {
+	.name		= MAX77693_CHARGER_NAME,
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= max77693_charger_props,
+	.num_properties	= ARRAY_SIZE(max77693_charger_props),
+	.get_property	= max77693_charger_get_property,
+};
+
 static ssize_t device_attr_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count,
 		int (*fn)(struct max77693_charger *, unsigned long))
@@ -659,6 +676,7 @@
 static int max77693_charger_probe(struct platform_device *pdev)
 {
 	struct max77693_charger *chg;
+	struct power_supply_config psy_cfg = {};
 	struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent);
 	int ret;
 
@@ -678,11 +696,7 @@
 	if (ret)
 		return ret;
 
-	chg->charger.name = max77693_charger_name;
-	chg->charger.type = POWER_SUPPLY_TYPE_BATTERY;
-	chg->charger.properties = max77693_charger_props;
-	chg->charger.num_properties = ARRAY_SIZE(max77693_charger_props);
-	chg->charger.get_property = max77693_charger_get_property;
+	psy_cfg.drv_data = chg;
 
 	ret = device_create_file(&pdev->dev, &dev_attr_fast_charge_timer);
 	if (ret) {
@@ -703,9 +717,12 @@
 		goto err;
 	}
 
-	ret = power_supply_register(&pdev->dev, &chg->charger);
-	if (ret) {
+	chg->charger = power_supply_register(&pdev->dev,
+						&max77693_charger_desc,
+						&psy_cfg);
+	if (IS_ERR(chg->charger)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
+		ret = PTR_ERR(chg->charger);
 		goto err;
 	}
 
@@ -727,7 +744,7 @@
 	device_remove_file(&pdev->dev, &dev_attr_top_off_threshold_current);
 	device_remove_file(&pdev->dev, &dev_attr_fast_charge_timer);
 
-	power_supply_unregister(&chg->charger);
+	power_supply_unregister(chg->charger);
 
 	return 0;
 }
diff --git a/drivers/power/max8903_charger.c b/drivers/power/max8903_charger.c
index 99e3cdc..bf2b4b3 100644
--- a/drivers/power/max8903_charger.c
+++ b/drivers/power/max8903_charger.c
@@ -31,7 +31,8 @@
 struct max8903_data {
 	struct max8903_pdata pdata;
 	struct device *dev;
-	struct power_supply psy;
+	struct power_supply *psy;
+	struct power_supply_desc psy_desc;
 	bool fault;
 	bool usb_in;
 	bool ta_in;
@@ -47,8 +48,7 @@
 		enum power_supply_property psp,
 		union power_supply_propval *val)
 {
-	struct max8903_data *data = container_of(psy,
-			struct max8903_data, psy);
+	struct max8903_data *data = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -104,17 +104,17 @@
 	dev_dbg(data->dev, "TA(DC-IN) Charger %s.\n", ta_in ?
 			"Connected" : "Disconnected");
 
-	old_type = data->psy.type;
+	old_type = data->psy_desc.type;
 
 	if (data->ta_in)
-		data->psy.type = POWER_SUPPLY_TYPE_MAINS;
+		data->psy_desc.type = POWER_SUPPLY_TYPE_MAINS;
 	else if (data->usb_in)
-		data->psy.type = POWER_SUPPLY_TYPE_USB;
+		data->psy_desc.type = POWER_SUPPLY_TYPE_USB;
 	else
-		data->psy.type = POWER_SUPPLY_TYPE_BATTERY;
+		data->psy_desc.type = POWER_SUPPLY_TYPE_BATTERY;
 
-	if (old_type != data->psy.type)
-		power_supply_changed(&data->psy);
+	if (old_type != data->psy_desc.type)
+		power_supply_changed(data->psy);
 
 	return IRQ_HANDLED;
 }
@@ -143,17 +143,17 @@
 	dev_dbg(data->dev, "USB Charger %s.\n", usb_in ?
 			"Connected" : "Disconnected");
 
-	old_type = data->psy.type;
+	old_type = data->psy_desc.type;
 
 	if (data->ta_in)
-		data->psy.type = POWER_SUPPLY_TYPE_MAINS;
+		data->psy_desc.type = POWER_SUPPLY_TYPE_MAINS;
 	else if (data->usb_in)
-		data->psy.type = POWER_SUPPLY_TYPE_USB;
+		data->psy_desc.type = POWER_SUPPLY_TYPE_USB;
 	else
-		data->psy.type = POWER_SUPPLY_TYPE_BATTERY;
+		data->psy_desc.type = POWER_SUPPLY_TYPE_BATTERY;
 
-	if (old_type != data->psy.type)
-		power_supply_changed(&data->psy);
+	if (old_type != data->psy_desc.type)
+		power_supply_changed(data->psy);
 
 	return IRQ_HANDLED;
 }
@@ -184,6 +184,7 @@
 	struct max8903_data *data;
 	struct device *dev = &pdev->dev;
 	struct max8903_pdata *pdata = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	int ret = 0;
 	int gpio;
 	int ta_in = 0;
@@ -280,17 +281,20 @@
 	data->ta_in = ta_in;
 	data->usb_in = usb_in;
 
-	data->psy.name = "max8903_charger";
-	data->psy.type = (ta_in) ? POWER_SUPPLY_TYPE_MAINS :
+	data->psy_desc.name = "max8903_charger";
+	data->psy_desc.type = (ta_in) ? POWER_SUPPLY_TYPE_MAINS :
 			((usb_in) ? POWER_SUPPLY_TYPE_USB :
 			 POWER_SUPPLY_TYPE_BATTERY);
-	data->psy.get_property = max8903_get_property;
-	data->psy.properties = max8903_charger_props;
-	data->psy.num_properties = ARRAY_SIZE(max8903_charger_props);
+	data->psy_desc.get_property = max8903_get_property;
+	data->psy_desc.properties = max8903_charger_props;
+	data->psy_desc.num_properties = ARRAY_SIZE(max8903_charger_props);
 
-	ret = power_supply_register(dev, &data->psy);
-	if (ret) {
+	psy_cfg.drv_data = data;
+
+	data->psy = power_supply_register(dev, &data->psy_desc, &psy_cfg);
+	if (IS_ERR(data->psy)) {
 		dev_err(dev, "failed: power supply register.\n");
+		ret = PTR_ERR(data->psy);
 		goto err;
 	}
 
@@ -339,7 +343,7 @@
 	if (pdata->dc_valid)
 		free_irq(gpio_to_irq(pdata->dok), data);
 err_psy:
-	power_supply_unregister(&data->psy);
+	power_supply_unregister(data->psy);
 err:
 	return ret;
 }
@@ -357,7 +361,7 @@
 			free_irq(gpio_to_irq(pdata->uok), data);
 		if (pdata->dc_valid)
 			free_irq(gpio_to_irq(pdata->dok), data);
-		power_supply_unregister(&data->psy);
+		power_supply_unregister(data->psy);
 	}
 
 	return 0;
diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index a6d45ee..57eb5c2 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -68,9 +68,9 @@
 	struct i2c_client	*gpm;
 	struct i2c_client	*adc;
 
-	struct power_supply	ac;
-	struct power_supply	usb;
-	struct power_supply	battery;
+	struct power_supply	*ac;
+	struct power_supply	*usb;
+	struct power_supply	*battery;
 	int			irq_base;
 	unsigned		ac_online:1;
 	unsigned		usb_online:1;
@@ -196,7 +196,7 @@
 			       enum power_supply_property psp,
 			       union power_supply_propval *val)
 {
-	struct max8925_power_info *info = dev_get_drvdata(psy->dev->parent);
+	struct max8925_power_info *info = dev_get_drvdata(psy->dev.parent);
 	int ret = 0;
 
 	switch (psp) {
@@ -230,7 +230,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct max8925_power_info *info = dev_get_drvdata(psy->dev->parent);
+	struct max8925_power_info *info = dev_get_drvdata(psy->dev.parent);
 	int ret = 0;
 
 	switch (psp) {
@@ -264,7 +264,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct max8925_power_info *info = dev_get_drvdata(psy->dev->parent);
+	struct max8925_power_info *info = dev_get_drvdata(psy->dev.parent);
 	int ret = 0;
 
 	switch (psp) {
@@ -347,6 +347,30 @@
 	POWER_SUPPLY_PROP_STATUS,
 };
 
+static const struct power_supply_desc ac_desc = {
+	.name		= "max8925-ac",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= max8925_ac_props,
+	.num_properties	= ARRAY_SIZE(max8925_ac_props),
+	.get_property	= max8925_ac_get_prop,
+};
+
+static const struct power_supply_desc usb_desc = {
+	.name		= "max8925-usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= max8925_usb_props,
+	.num_properties	= ARRAY_SIZE(max8925_usb_props),
+	.get_property	= max8925_usb_get_prop,
+};
+
+static const struct power_supply_desc battery_desc = {
+	.name		= "max8925-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= max8925_battery_props,
+	.num_properties	= ARRAY_SIZE(max8925_battery_props),
+	.get_property	= max8925_bat_get_prop,
+};
+
 #define REQUEST_IRQ(_irq, _name)					\
 do {									\
 	ret = request_threaded_irq(chip->irq_base + _irq, NULL,		\
@@ -482,6 +506,7 @@
 static int max8925_power_probe(struct platform_device *pdev)
 {
 	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct power_supply_config psy_cfg = {}; /* Only for ac and usb */
 	struct max8925_power_pdata *pdata = NULL;
 	struct max8925_power_info *info;
 	int ret;
@@ -502,40 +527,29 @@
 	info->adc = chip->adc;
 	platform_set_drvdata(pdev, info);
 
-	info->ac.name = "max8925-ac";
-	info->ac.type = POWER_SUPPLY_TYPE_MAINS;
-	info->ac.properties = max8925_ac_props;
-	info->ac.num_properties = ARRAY_SIZE(max8925_ac_props);
-	info->ac.get_property = max8925_ac_get_prop;
-	info->ac.supplied_to = pdata->supplied_to;
-	info->ac.num_supplicants = pdata->num_supplicants;
-	ret = power_supply_register(&pdev->dev, &info->ac);
-	if (ret)
+	psy_cfg.supplied_to = pdata->supplied_to;
+	psy_cfg.num_supplicants = pdata->num_supplicants;
+
+	info->ac = power_supply_register(&pdev->dev, &ac_desc, &psy_cfg);
+	if (IS_ERR(info->ac)) {
+		ret = PTR_ERR(info->ac);
 		goto out;
-	info->ac.dev->parent = &pdev->dev;
+	}
+	info->ac->dev.parent = &pdev->dev;
 
-	info->usb.name = "max8925-usb";
-	info->usb.type = POWER_SUPPLY_TYPE_USB;
-	info->usb.properties = max8925_usb_props;
-	info->usb.num_properties = ARRAY_SIZE(max8925_usb_props);
-	info->usb.get_property = max8925_usb_get_prop;
-	info->usb.supplied_to = pdata->supplied_to;
-	info->usb.num_supplicants = pdata->num_supplicants;
-
-	ret = power_supply_register(&pdev->dev, &info->usb);
-	if (ret)
+	info->usb = power_supply_register(&pdev->dev, &usb_desc, &psy_cfg);
+	if (IS_ERR(info->usb)) {
+		ret = PTR_ERR(info->usb);
 		goto out_usb;
-	info->usb.dev->parent = &pdev->dev;
+	}
+	info->usb->dev.parent = &pdev->dev;
 
-	info->battery.name = "max8925-battery";
-	info->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	info->battery.properties = max8925_battery_props;
-	info->battery.num_properties = ARRAY_SIZE(max8925_battery_props);
-	info->battery.get_property = max8925_bat_get_prop;
-	ret = power_supply_register(&pdev->dev, &info->battery);
-	if (ret)
+	info->battery = power_supply_register(&pdev->dev, &battery_desc, NULL);
+	if (IS_ERR(info->battery)) {
+		ret = PTR_ERR(info->battery);
 		goto out_battery;
-	info->battery.dev->parent = &pdev->dev;
+	}
+	info->battery->dev.parent = &pdev->dev;
 
 	info->batt_detect = pdata->batt_detect;
 	info->topoff_threshold = pdata->topoff_threshold;
@@ -547,9 +561,9 @@
 	max8925_init_charger(chip, info);
 	return 0;
 out_battery:
-	power_supply_unregister(&info->battery);
+	power_supply_unregister(info->battery);
 out_usb:
-	power_supply_unregister(&info->ac);
+	power_supply_unregister(info->ac);
 out:
 	return ret;
 }
@@ -559,9 +573,9 @@
 	struct max8925_power_info *info = platform_get_drvdata(pdev);
 
 	if (info) {
-		power_supply_unregister(&info->ac);
-		power_supply_unregister(&info->usb);
-		power_supply_unregister(&info->battery);
+		power_supply_unregister(info->ac);
+		power_supply_unregister(info->usb);
+		power_supply_unregister(info->battery);
 		max8925_deinit_charger(info);
 	}
 	return 0;
diff --git a/drivers/power/max8997_charger.c b/drivers/power/max8997_charger.c
index aefa0c9..0b2eab5 100644
--- a/drivers/power/max8997_charger.c
+++ b/drivers/power/max8997_charger.c
@@ -30,7 +30,7 @@
 struct charger_data {
 	struct device *dev;
 	struct max8997_dev *iodev;
-	struct power_supply battery;
+	struct power_supply *battery;
 };
 
 static enum power_supply_property max8997_battery_props[] = {
@@ -44,8 +44,7 @@
 		enum power_supply_property psp,
 		union power_supply_propval *val)
 {
-	struct charger_data *charger = container_of(psy,
-			struct charger_data, battery);
+	struct charger_data *charger = power_supply_get_drvdata(psy);
 	struct i2c_client *i2c = charger->iodev->i2c;
 	int ret;
 	u8 reg;
@@ -86,12 +85,21 @@
 	return 0;
 }
 
+static const struct power_supply_desc max8997_battery_desc = {
+	.name		= "max8997_pmic",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= max8997_battery_get_property,
+	.properties	= max8997_battery_props,
+	.num_properties	= ARRAY_SIZE(max8997_battery_props),
+};
+
 static int max8997_battery_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 	struct charger_data *charger;
 	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
 	struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct power_supply_config psy_cfg = {};
 
 	if (!pdata)
 		return -EINVAL;
@@ -147,19 +155,18 @@
 
 	platform_set_drvdata(pdev, charger);
 
-	charger->battery.name = "max8997_pmic";
-	charger->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	charger->battery.get_property = max8997_battery_get_property;
-	charger->battery.properties = max8997_battery_props;
-	charger->battery.num_properties = ARRAY_SIZE(max8997_battery_props);
 
 	charger->dev = &pdev->dev;
 	charger->iodev = iodev;
 
-	ret = power_supply_register(&pdev->dev, &charger->battery);
-	if (ret) {
+	psy_cfg.drv_data = charger;
+
+	charger->battery = power_supply_register(&pdev->dev,
+						 &max8997_battery_desc,
+						 &psy_cfg);
+	if (IS_ERR(charger->battery)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
-		return ret;
+		return PTR_ERR(charger->battery);
 	}
 
 	return 0;
@@ -169,7 +176,7 @@
 {
 	struct charger_data *charger = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&charger->battery);
+	power_supply_unregister(charger->battery);
 	return 0;
 }
 
diff --git a/drivers/power/max8998_charger.c b/drivers/power/max8998_charger.c
index 08694c7..47448d4 100644
--- a/drivers/power/max8998_charger.c
+++ b/drivers/power/max8998_charger.c
@@ -30,7 +30,7 @@
 struct max8998_battery_data {
 	struct device *dev;
 	struct max8998_dev *iodev;
-	struct power_supply battery;
+	struct power_supply *battery;
 };
 
 static enum power_supply_property max8998_battery_props[] = {
@@ -43,8 +43,7 @@
 		enum power_supply_property psp,
 		union power_supply_propval *val)
 {
-	struct max8998_battery_data *max8998 = container_of(psy,
-			struct max8998_battery_data, battery);
+	struct max8998_battery_data *max8998 = power_supply_get_drvdata(psy);
 	struct i2c_client *i2c = max8998->iodev->i2c;
 	int ret;
 	u8 reg;
@@ -75,10 +74,19 @@
 	return 0;
 }
 
+static const struct power_supply_desc max8998_battery_desc = {
+	.name		= "max8998_pmic",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= max8998_battery_get_property,
+	.properties	= max8998_battery_props,
+	.num_properties	= ARRAY_SIZE(max8998_battery_props),
+};
+
 static int max8998_battery_probe(struct platform_device *pdev)
 {
 	struct max8998_dev *iodev = dev_get_drvdata(pdev->dev.parent);
 	struct max8998_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct power_supply_config psy_cfg = {};
 	struct max8998_battery_data *max8998;
 	struct i2c_client *i2c;
 	int ret = 0;
@@ -161,15 +169,15 @@
 		goto err;
 	}
 
-	max8998->battery.name = "max8998_pmic";
-	max8998->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	max8998->battery.get_property = max8998_battery_get_property;
-	max8998->battery.properties = max8998_battery_props;
-	max8998->battery.num_properties = ARRAY_SIZE(max8998_battery_props);
+	psy_cfg.drv_data = max8998;
 
-	ret = power_supply_register(max8998->dev, &max8998->battery);
-	if (ret) {
-		dev_err(max8998->dev, "failed: power supply register\n");
+	max8998->battery = power_supply_register(max8998->dev,
+						 &max8998_battery_desc,
+						 &psy_cfg);
+	if (IS_ERR(max8998->battery)) {
+		ret = PTR_ERR(max8998->battery);
+		dev_err(max8998->dev, "failed: power supply register: %d\n",
+			ret);
 		goto err;
 	}
 
@@ -182,7 +190,7 @@
 {
 	struct max8998_battery_data *max8998 = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&max8998->battery);
+	power_supply_unregister(max8998->battery);
 
 	return 0;
 }
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index ad9cde7..a944338 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -81,7 +81,7 @@
 	POWER_SUPPLY_PROP_ONLINE,
 };
 
-static struct power_supply olpc_ac = {
+static const struct power_supply_desc olpc_ac_desc = {
 	.name = "olpc-ac",
 	.type = POWER_SUPPLY_TYPE_MAINS,
 	.properties = olpc_ac_props,
@@ -89,6 +89,8 @@
 	.get_property = olpc_ac_get_prop,
 };
 
+static struct power_supply *olpc_ac;
+
 static char bat_serial[17]; /* Ick */
 
 static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte)
@@ -574,21 +576,23 @@
  *		Initialisation
  *********************************************************************/
 
-static struct power_supply olpc_bat = {
+static struct power_supply_desc olpc_bat_desc = {
 	.name = "olpc-battery",
 	.get_property = olpc_bat_get_property,
 	.use_for_apm = 1,
 };
 
+static struct power_supply *olpc_bat;
+
 static int olpc_battery_suspend(struct platform_device *pdev,
 				pm_message_t state)
 {
-	if (device_may_wakeup(olpc_ac.dev))
+	if (device_may_wakeup(&olpc_ac->dev))
 		olpc_ec_wakeup_set(EC_SCI_SRC_ACPWR);
 	else
 		olpc_ec_wakeup_clear(EC_SCI_SRC_ACPWR);
 
-	if (device_may_wakeup(olpc_bat.dev))
+	if (device_may_wakeup(&olpc_bat->dev))
 		olpc_ec_wakeup_set(EC_SCI_SRC_BATTERY | EC_SCI_SRC_BATSOC
 				   | EC_SCI_SRC_BATERR);
 	else
@@ -619,52 +623,54 @@
 
 	/* Ignore the status. It doesn't actually matter */
 
-	ret = power_supply_register(&pdev->dev, &olpc_ac);
-	if (ret)
-		return ret;
+	olpc_ac = power_supply_register(&pdev->dev, &olpc_ac_desc, NULL);
+	if (IS_ERR(olpc_ac))
+		return PTR_ERR(olpc_ac);
 
 	if (olpc_board_at_least(olpc_board_pre(0xd0))) { /* XO-1.5 */
-		olpc_bat.properties = olpc_xo15_bat_props;
-		olpc_bat.num_properties = ARRAY_SIZE(olpc_xo15_bat_props);
+		olpc_bat_desc.properties = olpc_xo15_bat_props;
+		olpc_bat_desc.num_properties = ARRAY_SIZE(olpc_xo15_bat_props);
 	} else { /* XO-1 */
-		olpc_bat.properties = olpc_xo1_bat_props;
-		olpc_bat.num_properties = ARRAY_SIZE(olpc_xo1_bat_props);
+		olpc_bat_desc.properties = olpc_xo1_bat_props;
+		olpc_bat_desc.num_properties = ARRAY_SIZE(olpc_xo1_bat_props);
 	}
 
-	ret = power_supply_register(&pdev->dev, &olpc_bat);
-	if (ret)
+	olpc_bat = power_supply_register(&pdev->dev, &olpc_bat_desc, NULL);
+	if (IS_ERR(olpc_bat)) {
+		ret = PTR_ERR(olpc_bat);
 		goto battery_failed;
+	}
 
-	ret = device_create_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
+	ret = device_create_bin_file(&olpc_bat->dev, &olpc_bat_eeprom);
 	if (ret)
 		goto eeprom_failed;
 
-	ret = device_create_file(olpc_bat.dev, &olpc_bat_error);
+	ret = device_create_file(&olpc_bat->dev, &olpc_bat_error);
 	if (ret)
 		goto error_failed;
 
 	if (olpc_ec_wakeup_available()) {
-		device_set_wakeup_capable(olpc_ac.dev, true);
-		device_set_wakeup_capable(olpc_bat.dev, true);
+		device_set_wakeup_capable(&olpc_ac->dev, true);
+		device_set_wakeup_capable(&olpc_bat->dev, true);
 	}
 
 	return 0;
 
 error_failed:
-	device_remove_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
+	device_remove_bin_file(&olpc_bat->dev, &olpc_bat_eeprom);
 eeprom_failed:
-	power_supply_unregister(&olpc_bat);
+	power_supply_unregister(olpc_bat);
 battery_failed:
-	power_supply_unregister(&olpc_ac);
+	power_supply_unregister(olpc_ac);
 	return ret;
 }
 
 static int olpc_battery_remove(struct platform_device *pdev)
 {
-	device_remove_file(olpc_bat.dev, &olpc_bat_error);
-	device_remove_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
-	power_supply_unregister(&olpc_bat);
-	power_supply_unregister(&olpc_ac);
+	device_remove_file(&olpc_bat->dev, &olpc_bat_error);
+	device_remove_bin_file(&olpc_bat->dev, &olpc_bat_eeprom);
+	power_supply_unregister(olpc_bat);
+	power_supply_unregister(olpc_ac);
 	return 0;
 }
 
diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c
index 771c4f0..d05597b 100644
--- a/drivers/power/pcf50633-charger.c
+++ b/drivers/power/pcf50633-charger.c
@@ -33,9 +33,9 @@
 	int adapter_online;
 	int usb_online;
 
-	struct power_supply usb;
-	struct power_supply adapter;
-	struct power_supply ac;
+	struct power_supply *usb;
+	struct power_supply *adapter;
+	struct power_supply *ac;
 };
 
 int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma)
@@ -104,7 +104,7 @@
 				PCF50633_MBCC1_CHGENA, PCF50633_MBCC1_CHGENA);
 	}
 
-	power_supply_changed(&mbc->usb);
+	power_supply_changed(mbc->usb);
 
 	return ret;
 }
@@ -278,9 +278,9 @@
 	else if (irq == PCF50633_IRQ_ADPREM)
 		mbc->adapter_online = 0;
 
-	power_supply_changed(&mbc->ac);
-	power_supply_changed(&mbc->usb);
-	power_supply_changed(&mbc->adapter);
+	power_supply_changed(mbc->ac);
+	power_supply_changed(mbc->usb);
+	power_supply_changed(mbc->adapter);
 
 	if (mbc->pcf->pdata->mbc_event_callback)
 		mbc->pcf->pdata->mbc_event_callback(mbc->pcf, irq);
@@ -290,8 +290,7 @@
 			enum power_supply_property psp,
 			union power_supply_propval *val)
 {
-	struct pcf50633_mbc *mbc = container_of(psy,
-				struct pcf50633_mbc, adapter);
+	struct pcf50633_mbc *mbc = power_supply_get_drvdata(psy);
 	int ret = 0;
 
 	switch (psp) {
@@ -309,7 +308,7 @@
 			enum power_supply_property psp,
 			union power_supply_propval *val)
 {
-	struct pcf50633_mbc *mbc = container_of(psy, struct pcf50633_mbc, usb);
+	struct pcf50633_mbc *mbc = power_supply_get_drvdata(psy);
 	int ret = 0;
 	u8 usblim = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCC7) &
 						PCF50633_MBCC7_USB_MASK;
@@ -330,7 +329,7 @@
 			enum power_supply_property psp,
 			union power_supply_propval *val)
 {
-	struct pcf50633_mbc *mbc = container_of(psy, struct pcf50633_mbc, ac);
+	struct pcf50633_mbc *mbc = power_supply_get_drvdata(psy);
 	int ret = 0;
 	u8 usblim = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCC7) &
 						PCF50633_MBCC7_USB_MASK;
@@ -366,8 +365,33 @@
 	PCF50633_IRQ_LOWBAT,
 };
 
+static const struct power_supply_desc pcf50633_mbc_adapter_desc = {
+	.name		= "adapter",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= power_props,
+	.num_properties	= ARRAY_SIZE(power_props),
+	.get_property	= &adapter_get_property,
+};
+
+static const struct power_supply_desc pcf50633_mbc_usb_desc = {
+	.name		= "usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= power_props,
+	.num_properties	= ARRAY_SIZE(power_props),
+	.get_property	= usb_get_property,
+};
+
+static const struct power_supply_desc pcf50633_mbc_ac_desc = {
+	.name		= "ac",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= power_props,
+	.num_properties	= ARRAY_SIZE(power_props),
+	.get_property	= ac_get_property,
+};
+
 static int pcf50633_mbc_probe(struct platform_device *pdev)
 {
+	struct power_supply_config psy_cfg = {};
 	struct pcf50633_mbc *mbc;
 	int ret;
 	int i;
@@ -385,49 +409,36 @@
 		pcf50633_register_irq(mbc->pcf, mbc_irq_handlers[i],
 					pcf50633_mbc_irq_handler, mbc);
 
+	psy_cfg.supplied_to		= mbc->pcf->pdata->batteries;
+	psy_cfg.num_supplicants		= mbc->pcf->pdata->num_batteries;
+	psy_cfg.drv_data		= mbc;
+
 	/* Create power supplies */
-	mbc->adapter.name		= "adapter";
-	mbc->adapter.type		= POWER_SUPPLY_TYPE_MAINS;
-	mbc->adapter.properties		= power_props;
-	mbc->adapter.num_properties	= ARRAY_SIZE(power_props);
-	mbc->adapter.get_property	= &adapter_get_property;
-	mbc->adapter.supplied_to	= mbc->pcf->pdata->batteries;
-	mbc->adapter.num_supplicants	= mbc->pcf->pdata->num_batteries;
-
-	mbc->usb.name			= "usb";
-	mbc->usb.type			= POWER_SUPPLY_TYPE_USB;
-	mbc->usb.properties		= power_props;
-	mbc->usb.num_properties		= ARRAY_SIZE(power_props);
-	mbc->usb.get_property		= usb_get_property;
-	mbc->usb.supplied_to		= mbc->pcf->pdata->batteries;
-	mbc->usb.num_supplicants	= mbc->pcf->pdata->num_batteries;
-
-	mbc->ac.name			= "ac";
-	mbc->ac.type			= POWER_SUPPLY_TYPE_MAINS;
-	mbc->ac.properties		= power_props;
-	mbc->ac.num_properties		= ARRAY_SIZE(power_props);
-	mbc->ac.get_property		= ac_get_property;
-	mbc->ac.supplied_to		= mbc->pcf->pdata->batteries;
-	mbc->ac.num_supplicants		= mbc->pcf->pdata->num_batteries;
-
-	ret = power_supply_register(&pdev->dev, &mbc->adapter);
-	if (ret) {
+	mbc->adapter = power_supply_register(&pdev->dev,
+					     &pcf50633_mbc_adapter_desc,
+					     &psy_cfg);
+	if (IS_ERR(mbc->adapter)) {
 		dev_err(mbc->pcf->dev, "failed to register adapter\n");
+		ret = PTR_ERR(mbc->adapter);
 		return ret;
 	}
 
-	ret = power_supply_register(&pdev->dev, &mbc->usb);
-	if (ret) {
+	mbc->usb = power_supply_register(&pdev->dev, &pcf50633_mbc_usb_desc,
+					 &psy_cfg);
+	if (IS_ERR(mbc->usb)) {
 		dev_err(mbc->pcf->dev, "failed to register usb\n");
-		power_supply_unregister(&mbc->adapter);
+		power_supply_unregister(mbc->adapter);
+		ret = PTR_ERR(mbc->usb);
 		return ret;
 	}
 
-	ret = power_supply_register(&pdev->dev, &mbc->ac);
-	if (ret) {
+	mbc->ac = power_supply_register(&pdev->dev, &pcf50633_mbc_ac_desc,
+					&psy_cfg);
+	if (IS_ERR(mbc->ac)) {
 		dev_err(mbc->pcf->dev, "failed to register ac\n");
-		power_supply_unregister(&mbc->adapter);
-		power_supply_unregister(&mbc->usb);
+		power_supply_unregister(mbc->adapter);
+		power_supply_unregister(mbc->usb);
+		ret = PTR_ERR(mbc->ac);
 		return ret;
 	}
 
@@ -454,9 +465,9 @@
 		pcf50633_free_irq(mbc->pcf, mbc_irq_handlers[i]);
 
 	sysfs_remove_group(&pdev->dev.kobj, &mbc_attr_group);
-	power_supply_unregister(&mbc->usb);
-	power_supply_unregister(&mbc->adapter);
-	power_supply_unregister(&mbc->ac);
+	power_supply_unregister(mbc->usb);
+	power_supply_unregister(mbc->adapter);
+	power_supply_unregister(mbc->ac);
 
 	return 0;
 }
diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 0c52e2a..dfe1ee8 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -34,6 +34,7 @@
 static struct timer_list supply_timer;
 static struct timer_list polling_timer;
 static int polling;
+static struct power_supply *pda_psy_ac, *pda_psy_usb;
 
 #if IS_ENABLED(CONFIG_USB_PHY)
 static struct usb_phy *transceiver;
@@ -58,7 +59,7 @@
 {
 	switch (psp) {
 	case POWER_SUPPLY_PROP_ONLINE:
-		if (psy->type == POWER_SUPPLY_TYPE_MAINS)
+		if (psy->desc->type == POWER_SUPPLY_TYPE_MAINS)
 			val->intval = pdata->is_ac_online ?
 				      pdata->is_ac_online() : 0;
 		else
@@ -80,21 +81,17 @@
 	"backup-battery",
 };
 
-static struct power_supply pda_psy_ac = {
+static const struct power_supply_desc pda_psy_ac_desc = {
 	.name = "ac",
 	.type = POWER_SUPPLY_TYPE_MAINS,
-	.supplied_to = pda_power_supplied_to,
-	.num_supplicants = ARRAY_SIZE(pda_power_supplied_to),
 	.properties = pda_power_props,
 	.num_properties = ARRAY_SIZE(pda_power_props),
 	.get_property = pda_power_get_property,
 };
 
-static struct power_supply pda_psy_usb = {
+static const struct power_supply_desc pda_psy_usb_desc = {
 	.name = "usb",
 	.type = POWER_SUPPLY_TYPE_USB,
-	.supplied_to = pda_power_supplied_to,
-	.num_supplicants = ARRAY_SIZE(pda_power_supplied_to),
 	.properties = pda_power_props,
 	.num_properties = ARRAY_SIZE(pda_power_props),
 	.get_property = pda_power_get_property,
@@ -147,12 +144,12 @@
 {
 	if (ac_status == PDA_PSY_TO_CHANGE) {
 		ac_status = new_ac_status;
-		power_supply_changed(&pda_psy_ac);
+		power_supply_changed(pda_psy_ac);
 	}
 
 	if (usb_status == PDA_PSY_TO_CHANGE) {
 		usb_status = new_usb_status;
-		power_supply_changed(&pda_psy_usb);
+		power_supply_changed(pda_psy_usb);
 	}
 }
 
@@ -176,9 +173,9 @@
 
 static irqreturn_t power_changed_isr(int irq, void *power_supply)
 {
-	if (power_supply == &pda_psy_ac)
+	if (power_supply == pda_psy_ac)
 		ac_status = PDA_PSY_TO_CHANGE;
-	else if (power_supply == &pda_psy_usb)
+	else if (power_supply == pda_psy_usb)
 		usb_status = PDA_PSY_TO_CHANGE;
 	else
 		return IRQ_NONE;
@@ -262,6 +259,7 @@
 
 static int pda_power_probe(struct platform_device *pdev)
 {
+	struct power_supply_config psy_cfg = {};
 	int ret = 0;
 
 	dev = &pdev->dev;
@@ -309,10 +307,11 @@
 	usb_irq = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "usb");
 
 	if (pdata->supplied_to) {
-		pda_psy_ac.supplied_to = pdata->supplied_to;
-		pda_psy_ac.num_supplicants = pdata->num_supplicants;
-		pda_psy_usb.supplied_to = pdata->supplied_to;
-		pda_psy_usb.num_supplicants = pdata->num_supplicants;
+		psy_cfg.supplied_to = pdata->supplied_to;
+		psy_cfg.num_supplicants = pdata->num_supplicants;
+	} else {
+		psy_cfg.supplied_to = pda_power_supplied_to;
+		psy_cfg.num_supplicants = ARRAY_SIZE(pda_power_supplied_to);
 	}
 
 #if IS_ENABLED(CONFIG_USB_PHY)
@@ -326,17 +325,19 @@
 #endif
 
 	if (pdata->is_ac_online) {
-		ret = power_supply_register(&pdev->dev, &pda_psy_ac);
-		if (ret) {
+		pda_psy_ac = power_supply_register(&pdev->dev,
+						   &pda_psy_ac_desc, &psy_cfg);
+		if (IS_ERR(pda_psy_ac)) {
 			dev_err(dev, "failed to register %s power supply\n",
-				pda_psy_ac.name);
+				pda_psy_ac_desc.name);
+			ret = PTR_ERR(pda_psy_ac);
 			goto ac_supply_failed;
 		}
 
 		if (ac_irq) {
 			ret = request_irq(ac_irq->start, power_changed_isr,
 					  get_irq_flags(ac_irq), ac_irq->name,
-					  &pda_psy_ac);
+					  pda_psy_ac);
 			if (ret) {
 				dev_err(dev, "request ac irq failed\n");
 				goto ac_irq_failed;
@@ -347,17 +348,20 @@
 	}
 
 	if (pdata->is_usb_online) {
-		ret = power_supply_register(&pdev->dev, &pda_psy_usb);
-		if (ret) {
+		pda_psy_usb = power_supply_register(&pdev->dev,
+						    &pda_psy_usb_desc,
+						    &psy_cfg);
+		if (IS_ERR(pda_psy_usb)) {
 			dev_err(dev, "failed to register %s power supply\n",
-				pda_psy_usb.name);
+				pda_psy_usb_desc.name);
+			ret = PTR_ERR(pda_psy_usb);
 			goto usb_supply_failed;
 		}
 
 		if (usb_irq) {
 			ret = request_irq(usb_irq->start, power_changed_isr,
 					  get_irq_flags(usb_irq),
-					  usb_irq->name, &pda_psy_usb);
+					  usb_irq->name, pda_psy_usb);
 			if (ret) {
 				dev_err(dev, "request usb irq failed\n");
 				goto usb_irq_failed;
@@ -394,21 +398,21 @@
 #if IS_ENABLED(CONFIG_USB_PHY)
 otg_reg_notifier_failed:
 	if (pdata->is_usb_online && usb_irq)
-		free_irq(usb_irq->start, &pda_psy_usb);
+		free_irq(usb_irq->start, pda_psy_usb);
 #endif
 usb_irq_failed:
 	if (pdata->is_usb_online)
-		power_supply_unregister(&pda_psy_usb);
+		power_supply_unregister(pda_psy_usb);
 usb_supply_failed:
 	if (pdata->is_ac_online && ac_irq)
-		free_irq(ac_irq->start, &pda_psy_ac);
+		free_irq(ac_irq->start, pda_psy_ac);
 #if IS_ENABLED(CONFIG_USB_PHY)
 	if (!IS_ERR_OR_NULL(transceiver))
 		usb_put_phy(transceiver);
 #endif
 ac_irq_failed:
 	if (pdata->is_ac_online)
-		power_supply_unregister(&pda_psy_ac);
+		power_supply_unregister(pda_psy_ac);
 ac_supply_failed:
 	if (ac_draw) {
 		regulator_put(ac_draw);
@@ -424,9 +428,9 @@
 static int pda_power_remove(struct platform_device *pdev)
 {
 	if (pdata->is_usb_online && usb_irq)
-		free_irq(usb_irq->start, &pda_psy_usb);
+		free_irq(usb_irq->start, pda_psy_usb);
 	if (pdata->is_ac_online && ac_irq)
-		free_irq(ac_irq->start, &pda_psy_ac);
+		free_irq(ac_irq->start, pda_psy_ac);
 
 	if (polling)
 		del_timer_sync(&polling_timer);
@@ -434,9 +438,9 @@
 	del_timer_sync(&supply_timer);
 
 	if (pdata->is_usb_online)
-		power_supply_unregister(&pda_psy_usb);
+		power_supply_unregister(pda_psy_usb);
 	if (pdata->is_ac_online)
-		power_supply_unregister(&pda_psy_ac);
+		power_supply_unregister(pda_psy_ac);
 #if IS_ENABLED(CONFIG_USB_PHY)
 	if (!IS_ERR_OR_NULL(transceiver))
 		usb_put_phy(transceiver);
diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 7773249..cc0893f 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -216,7 +216,7 @@
 {
 	dev_err(pm2->dev, "Overvoltage detected\n");
 	pm2->flags.ovv = true;
-	power_supply_changed(&pm2->ac_chg.psy);
+	power_supply_changed(pm2->ac_chg.psy);
 
 	/* Schedule a new HW failure check */
 	queue_delayed_work(pm2->charger_wq, &pm2->check_hw_failure_work, 0);
@@ -229,7 +229,7 @@
 	dev_dbg(pm2->dev , "20 minutes watchdog expired\n");
 
 	pm2->ac.wd_expired = true;
-	power_supply_changed(&pm2->ac_chg.psy);
+	power_supply_changed(pm2->ac_chg.psy);
 
 	return 0;
 }
@@ -573,7 +573,7 @@
 	struct pm2xxx_charger *pm2;
 	u8 val;
 
-	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+	if (charger->psy->desc->type == POWER_SUPPLY_TYPE_MAINS)
 		pm2 = to_pm2xxx_charger_ac_device_info(charger);
 	else
 		return -ENXIO;
@@ -816,7 +816,7 @@
 
 		dev_dbg(pm2->dev, "PM2301: " "Disabled AC charging\n");
 	}
-	power_supply_changed(&pm2->ac_chg.psy);
+	power_supply_changed(pm2->ac_chg.psy);
 
 error_occured:
 	return ret;
@@ -827,7 +827,7 @@
 	int ret;
 	struct pm2xxx_charger *pm2;
 
-	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+	if (charger->psy->desc->type == POWER_SUPPLY_TYPE_MAINS)
 		pm2 = to_pm2xxx_charger_ac_device_info(charger);
 	else
 		return -ENXIO;
@@ -845,8 +845,8 @@
 		struct pm2xxx_charger, ac_work);
 
 
-	power_supply_changed(&pm2->ac_chg.psy);
-	sysfs_notify(&pm2->ac_chg.psy.dev->kobj, NULL, "present");
+	power_supply_changed(pm2->ac_chg.psy);
+	sysfs_notify(&pm2->ac_chg.psy->dev.kobj, NULL, "present");
 };
 
 static void pm2xxx_charger_check_hw_failure_work(struct work_struct *work)
@@ -862,7 +862,7 @@
 		if (!(reg_value & (PM2XXX_INT4_S_ITVPWR1OVV |
 					PM2XXX_INT4_S_ITVPWR2OVV))) {
 			pm2->flags.ovv = false;
-			power_supply_changed(&pm2->ac_chg.psy);
+			power_supply_changed(pm2->ac_chg.psy);
 		}
 	}
 
@@ -895,7 +895,7 @@
 				| PM2XXX_INT5_S_ITTHERMALSHUTDOWNFALL))
 		pm2->flags.main_thermal_prot = false;
 
-	power_supply_changed(&pm2->ac_chg.psy);
+	power_supply_changed(pm2->ac_chg.psy);
 }
 
 static struct pm2xxx_interrupts pm2xxx_int = {
@@ -989,6 +989,7 @@
 		const struct i2c_device_id *id)
 {
 	struct pm2xxx_platform_data *pl_data = i2c_client->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	struct pm2xxx_charger *pm2;
 	int ret = 0;
 	u8 val;
@@ -1042,13 +1043,14 @@
 
 	/* AC supply */
 	/* power_supply base class */
-	pm2->ac_chg.psy.name = pm2->pdata->label;
-	pm2->ac_chg.psy.type = POWER_SUPPLY_TYPE_MAINS;
-	pm2->ac_chg.psy.properties = pm2xxx_charger_ac_props;
-	pm2->ac_chg.psy.num_properties = ARRAY_SIZE(pm2xxx_charger_ac_props);
-	pm2->ac_chg.psy.get_property = pm2xxx_charger_ac_get_property;
-	pm2->ac_chg.psy.supplied_to = pm2->pdata->supplied_to;
-	pm2->ac_chg.psy.num_supplicants = pm2->pdata->num_supplicants;
+	pm2->ac_chg_desc.name = pm2->pdata->label;
+	pm2->ac_chg_desc.type = POWER_SUPPLY_TYPE_MAINS;
+	pm2->ac_chg_desc.properties = pm2xxx_charger_ac_props;
+	pm2->ac_chg_desc.num_properties = ARRAY_SIZE(pm2xxx_charger_ac_props);
+	pm2->ac_chg_desc.get_property = pm2xxx_charger_ac_get_property;
+
+	psy_cfg.supplied_to = pm2->pdata->supplied_to;
+	psy_cfg.num_supplicants = pm2->pdata->num_supplicants;
 	/* pm2xxx_charger sub-class */
 	pm2->ac_chg.ops.enable = &pm2xxx_charger_ac_en;
 	pm2->ac_chg.ops.kick_wd = &pm2xxx_charger_watchdog_kick;
@@ -1093,9 +1095,11 @@
 	}
 
 	/* Register AC charger class */
-	ret = power_supply_register(pm2->dev, &pm2->ac_chg.psy);
-	if (ret) {
+	pm2->ac_chg.psy = power_supply_register(pm2->dev, &pm2->ac_chg_desc,
+						&psy_cfg);
+	if (IS_ERR(pm2->ac_chg.psy)) {
 		dev_err(pm2->dev, "failed to register AC charger\n");
+		ret = PTR_ERR(pm2->ac_chg.psy);
 		goto free_regulator;
 	}
 
@@ -1167,8 +1171,8 @@
 		ab8500_override_turn_on_stat(~AB8500_POW_KEY_1_ON,
 					     AB8500_MAIN_CH_DET);
 		pm2->ac_conn = true;
-		power_supply_changed(&pm2->ac_chg.psy);
-		sysfs_notify(&pm2->ac_chg.psy.dev->kobj, NULL, "present");
+		power_supply_changed(pm2->ac_chg.psy);
+		sysfs_notify(&pm2->ac_chg.psy->dev.kobj, NULL, "present");
 	}
 
 	return 0;
@@ -1183,7 +1187,7 @@
 	free_irq(gpio_to_irq(pm2->pdata->gpio_irq_number), pm2);
 unregister_pm2xxx_charger:
 	/* unregister power supply */
-	power_supply_unregister(&pm2->ac_chg.psy);
+	power_supply_unregister(pm2->ac_chg.psy);
 free_regulator:
 	/* disable the regulator */
 	regulator_put(pm2->regu);
@@ -1218,7 +1222,7 @@
 	/* disable the regulator */
 	regulator_put(pm2->regu);
 
-	power_supply_unregister(&pm2->ac_chg.psy);
+	power_supply_unregister(pm2->ac_chg.psy);
 
 	if (gpio_is_valid(pm2->lpn_pin))
 		gpio_free(pm2->lpn_pin);
diff --git a/drivers/power/pm2301_charger.h b/drivers/power/pm2301_charger.h
index 8ce3cc0..24181cf 100644
--- a/drivers/power/pm2301_charger.h
+++ b/drivers/power/pm2301_charger.h
@@ -486,6 +486,7 @@
 	struct work_struct check_main_thermal_prot_work;
 	struct delayed_work check_hw_failure_work;
 	struct ux500_charger ac_chg;
+	struct power_supply_desc ac_chg_desc;
 	struct pm2xxx_charger_event_flags flags;
 };
 
diff --git a/drivers/power/pmu_battery.c b/drivers/power/pmu_battery.c
index 023d2499..9c8d525 100644
--- a/drivers/power/pmu_battery.c
+++ b/drivers/power/pmu_battery.c
@@ -17,13 +17,14 @@
 #include <linux/slab.h>
 
 static struct pmu_battery_dev {
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct pmu_battery_info *pbi;
 	char name[16];
 	int propval;
 } *pbats[PMU_MAX_BATTERIES];
 
-#define to_pmu_battery_dev(x) container_of(x, struct pmu_battery_dev, bat)
+#define to_pmu_battery_dev(x) power_supply_get_drvdata(x)
 
 /*********************************************************************
  *		Power
@@ -49,7 +50,7 @@
 	POWER_SUPPLY_PROP_ONLINE,
 };
 
-static struct power_supply pmu_ac = {
+static const struct power_supply_desc pmu_ac_desc = {
 	.name = "pmu-ac",
 	.type = POWER_SUPPLY_TYPE_MAINS,
 	.properties = pmu_ac_props,
@@ -57,6 +58,8 @@
 	.get_property = pmu_get_ac_prop,
 };
 
+static struct power_supply *pmu_ac;
+
 /*********************************************************************
  *		Battery properties
  *********************************************************************/
@@ -142,7 +145,7 @@
 
 static int __init pmu_bat_init(void)
 {
-	int ret;
+	int ret = 0;
 	int i;
 
 	bat_pdev = platform_device_register_simple("pmu-battery",
@@ -152,25 +155,32 @@
 		goto pdev_register_failed;
 	}
 
-	ret = power_supply_register(&bat_pdev->dev, &pmu_ac);
-	if (ret)
+	pmu_ac = power_supply_register(&bat_pdev->dev, &pmu_ac_desc, NULL);
+	if (IS_ERR(pmu_ac)) {
+		ret = PTR_ERR(pmu_ac);
 		goto ac_register_failed;
+	}
 
 	for (i = 0; i < pmu_battery_count; i++) {
+		struct power_supply_config psy_cfg = {};
 		struct pmu_battery_dev *pbat = kzalloc(sizeof(*pbat),
 						       GFP_KERNEL);
 		if (!pbat)
 			break;
 
 		sprintf(pbat->name, "PMU_battery_%d", i);
-		pbat->bat.name = pbat->name;
-		pbat->bat.properties = pmu_bat_props;
-		pbat->bat.num_properties = ARRAY_SIZE(pmu_bat_props);
-		pbat->bat.get_property = pmu_bat_get_property;
+		pbat->bat_desc.name = pbat->name;
+		pbat->bat_desc.properties = pmu_bat_props;
+		pbat->bat_desc.num_properties = ARRAY_SIZE(pmu_bat_props);
+		pbat->bat_desc.get_property = pmu_bat_get_property;
 		pbat->pbi = &pmu_batteries[i];
+		psy_cfg.drv_data = pbat;
 
-		ret = power_supply_register(&bat_pdev->dev, &pbat->bat);
-		if (ret) {
+		pbat->bat = power_supply_register(&bat_pdev->dev,
+						  &pbat->bat_desc,
+						  &psy_cfg);
+		if (IS_ERR(pbat->bat)) {
+			ret = PTR_ERR(pbat->bat);
 			kfree(pbat);
 			goto battery_register_failed;
 		}
@@ -183,10 +193,10 @@
 	while (i--) {
 		if (!pbats[i])
 			continue;
-		power_supply_unregister(&pbats[i]->bat);
+		power_supply_unregister(pbats[i]->bat);
 		kfree(pbats[i]);
 	}
-	power_supply_unregister(&pmu_ac);
+	power_supply_unregister(pmu_ac);
 ac_register_failed:
 	platform_device_unregister(bat_pdev);
 pdev_register_failed:
@@ -201,10 +211,10 @@
 	for (i = 0; i < PMU_MAX_BATTERIES; i++) {
 		if (!pbats[i])
 			continue;
-		power_supply_unregister(&pbats[i]->bat);
+		power_supply_unregister(pbats[i]->bat);
 		kfree(pbats[i]);
 	}
-	power_supply_unregister(&pmu_ac);
+	power_supply_unregister(pmu_ac);
 	platform_device_unregister(bat_pdev);
 }
 
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 694e8cd..2ed4a4a 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -40,16 +40,16 @@
 
 	/* Support both supplied_to and supplied_from modes */
 	if (supply->supplied_from) {
-		if (!supplier->name)
+		if (!supplier->desc->name)
 			return false;
 		for (i = 0; i < supply->num_supplies; i++)
-			if (!strcmp(supplier->name, supply->supplied_from[i]))
+			if (!strcmp(supplier->desc->name, supply->supplied_from[i]))
 				return true;
 	} else {
-		if (!supply->name)
+		if (!supply->desc->name)
 			return false;
 		for (i = 0; i < supplier->num_supplicants; i++)
-			if (!strcmp(supplier->supplied_to[i], supply->name))
+			if (!strcmp(supplier->supplied_to[i], supply->desc->name))
 				return true;
 	}
 
@@ -62,8 +62,8 @@
 	struct power_supply *pst = dev_get_drvdata(dev);
 
 	if (__power_supply_is_supplied_by(psy, pst)) {
-		if (pst->external_power_changed)
-			pst->external_power_changed(pst);
+		if (pst->desc->external_power_changed)
+			pst->desc->external_power_changed(pst);
 	}
 
 	return 0;
@@ -75,7 +75,7 @@
 	struct power_supply *psy = container_of(work, struct power_supply,
 						changed_work);
 
-	dev_dbg(psy->dev, "%s\n", __func__);
+	dev_dbg(&psy->dev, "%s\n", __func__);
 
 	spin_lock_irqsave(&psy->changed_lock, flags);
 	/*
@@ -93,7 +93,7 @@
 		power_supply_update_leds(psy);
 		atomic_notifier_call_chain(&power_supply_notifier,
 				PSY_EVENT_PROP_CHANGED, psy);
-		kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
+		kobject_uevent(&psy->dev.kobj, KOBJ_CHANGE);
 		spin_lock_irqsave(&psy->changed_lock, flags);
 	}
 
@@ -103,7 +103,7 @@
 	 * to true.
 	 */
 	if (likely(!psy->changed))
-		pm_relax(psy->dev);
+		pm_relax(&psy->dev);
 	spin_unlock_irqrestore(&psy->changed_lock, flags);
 }
 
@@ -111,11 +111,11 @@
 {
 	unsigned long flags;
 
-	dev_dbg(psy->dev, "%s\n", __func__);
+	dev_dbg(&psy->dev, "%s\n", __func__);
 
 	spin_lock_irqsave(&psy->changed_lock, flags);
 	psy->changed = true;
-	pm_stay_awake(psy->dev);
+	pm_stay_awake(&psy->dev);
 	spin_unlock_irqrestore(&psy->changed_lock, flags);
 	schedule_work(&psy->changed_work);
 }
@@ -138,9 +138,9 @@
 			break;
 
 		if (np == epsy->of_node) {
-			dev_info(psy->dev, "%s: Found supply : %s\n",
-				psy->name, epsy->name);
-			psy->supplied_from[i-1] = (char *)epsy->name;
+			dev_info(&psy->dev, "%s: Found supply : %s\n",
+				psy->desc->name, epsy->desc->name);
+			psy->supplied_from[i-1] = (char *)epsy->desc->name;
 			psy->num_supplies++;
 			of_node_put(np);
 			break;
@@ -158,7 +158,7 @@
 	error = class_for_each_device(power_supply_class, NULL, psy,
 				      __power_supply_populate_supplied_from);
 
-	dev_dbg(psy->dev, "%s %d\n", __func__, error);
+	dev_dbg(&psy->dev, "%s %d\n", __func__, error);
 
 	return error;
 }
@@ -220,7 +220,7 @@
 		of_node_put(np);
 
 		if (ret) {
-			dev_dbg(psy->dev, "Failed to find supply!\n");
+			dev_dbg(&psy->dev, "Failed to find supply!\n");
 			return ret;
 		}
 	} while (np);
@@ -230,17 +230,18 @@
 		return 0;
 
 	/* All supplies found, allocate char ** array for filling */
-	psy->supplied_from = devm_kzalloc(psy->dev, sizeof(psy->supplied_from),
+	psy->supplied_from = devm_kzalloc(&psy->dev, sizeof(psy->supplied_from),
 					  GFP_KERNEL);
 	if (!psy->supplied_from) {
-		dev_err(psy->dev, "Couldn't allocate memory for supply list\n");
+		dev_err(&psy->dev, "Couldn't allocate memory for supply list\n");
 		return -ENOMEM;
 	}
 
-	*psy->supplied_from = devm_kzalloc(psy->dev, sizeof(char *) * (cnt - 1),
+	*psy->supplied_from = devm_kzalloc(&psy->dev,
+					   sizeof(char *) * (cnt - 1),
 					   GFP_KERNEL);
 	if (!*psy->supplied_from) {
-		dev_err(psy->dev, "Couldn't allocate memory for supply list\n");
+		dev_err(&psy->dev, "Couldn't allocate memory for supply list\n");
 		return -ENOMEM;
 	}
 
@@ -260,7 +261,8 @@
 	struct power_supply *epsy = dev_get_drvdata(dev);
 
 	if (__power_supply_is_supplied_by(epsy, psy))
-		if (!epsy->get_property(epsy, POWER_SUPPLY_PROP_ONLINE, &ret))
+		if (!epsy->desc->get_property(epsy, POWER_SUPPLY_PROP_ONLINE,
+					&ret))
 			return ret.intval;
 
 	return 0;
@@ -273,7 +275,7 @@
 	error = class_for_each_device(power_supply_class, NULL, psy,
 				      __power_supply_am_i_supplied);
 
-	dev_dbg(psy->dev, "%s %d\n", __func__, error);
+	dev_dbg(&psy->dev, "%s %d\n", __func__, error);
 
 	return error;
 }
@@ -286,8 +288,9 @@
 	unsigned int *count = data;
 
 	(*count)++;
-	if (psy->type != POWER_SUPPLY_TYPE_BATTERY)
-		if (!psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &ret))
+	if (psy->desc->type != POWER_SUPPLY_TYPE_BATTERY)
+		if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE,
+					&ret))
 			return ret.intval;
 
 	return 0;
@@ -314,8 +317,10 @@
 
 int power_supply_set_battery_charged(struct power_supply *psy)
 {
-	if (psy->type == POWER_SUPPLY_TYPE_BATTERY && psy->set_charged) {
-		psy->set_charged(psy);
+	if (atomic_read(&psy->use_cnt) >= 0 &&
+			psy->desc->type == POWER_SUPPLY_TYPE_BATTERY &&
+			psy->desc->set_charged) {
+		psy->desc->set_charged(psy);
 		return 0;
 	}
 
@@ -328,28 +333,74 @@
 	const char *name = data;
 	struct power_supply *psy = dev_get_drvdata(dev);
 
-	return strcmp(psy->name, name) == 0;
+	return strcmp(psy->desc->name, name) == 0;
 }
 
+/**
+ * power_supply_get_by_name() - Search for a power supply and returns its ref
+ * @name: Power supply name to fetch
+ *
+ * If power supply was found, it increases reference count for the
+ * internal power supply's device. The user should power_supply_put()
+ * after usage.
+ *
+ * Return: On success returns a reference to a power supply with
+ * matching name equals to @name, a NULL otherwise.
+ */
 struct power_supply *power_supply_get_by_name(const char *name)
 {
+	struct power_supply *psy = NULL;
 	struct device *dev = class_find_device(power_supply_class, NULL, name,
 					power_supply_match_device_by_name);
 
-	return dev ? dev_get_drvdata(dev) : NULL;
+	if (dev) {
+		psy = dev_get_drvdata(dev);
+		atomic_inc(&psy->use_cnt);
+	}
+
+	return psy;
 }
 EXPORT_SYMBOL_GPL(power_supply_get_by_name);
 
+/**
+ * power_supply_put() - Drop reference obtained with power_supply_get_by_name
+ * @psy: Reference to put
+ *
+ * The reference to power supply should be put before unregistering
+ * the power supply.
+ */
+void power_supply_put(struct power_supply *psy)
+{
+	might_sleep();
+
+	atomic_dec(&psy->use_cnt);
+	put_device(&psy->dev);
+}
+EXPORT_SYMBOL_GPL(power_supply_put);
+
 #ifdef CONFIG_OF
 static int power_supply_match_device_node(struct device *dev, const void *data)
 {
 	return dev->parent && dev->parent->of_node == data;
 }
 
+/**
+ * power_supply_get_by_phandle() - Search for a power supply and returns its ref
+ * @np: Pointer to device node holding phandle property
+ * @phandle_name: Name of property holding a power supply name
+ *
+ * If power supply was found, it increases reference count for the
+ * internal power supply's device. The user should power_supply_put()
+ * after usage.
+ *
+ * Return: On success returns a reference to a power supply with
+ * matching name equals to value under @property, NULL or ERR_PTR otherwise.
+ */
 struct power_supply *power_supply_get_by_phandle(struct device_node *np,
 							const char *property)
 {
 	struct device_node *power_supply_np;
+	struct power_supply *psy = NULL;
 	struct device *dev;
 
 	power_supply_np = of_parse_phandle(np, property, 0);
@@ -361,21 +412,70 @@
 
 	of_node_put(power_supply_np);
 
-	return dev ? dev_get_drvdata(dev) : NULL;
+	if (dev) {
+		psy = dev_get_drvdata(dev);
+		atomic_inc(&psy->use_cnt);
+	}
+
+	return psy;
 }
 EXPORT_SYMBOL_GPL(power_supply_get_by_phandle);
 #endif /* CONFIG_OF */
 
+int power_supply_get_property(struct power_supply *psy,
+			    enum power_supply_property psp,
+			    union power_supply_propval *val)
+{
+	if (atomic_read(&psy->use_cnt) <= 0)
+		return -ENODEV;
+
+	return psy->desc->get_property(psy, psp, val);
+}
+EXPORT_SYMBOL_GPL(power_supply_get_property);
+
+int power_supply_set_property(struct power_supply *psy,
+			    enum power_supply_property psp,
+			    const union power_supply_propval *val)
+{
+	if (atomic_read(&psy->use_cnt) <= 0 || !psy->desc->set_property)
+		return -ENODEV;
+
+	return psy->desc->set_property(psy, psp, val);
+}
+EXPORT_SYMBOL_GPL(power_supply_set_property);
+
+int power_supply_property_is_writeable(struct power_supply *psy,
+					enum power_supply_property psp)
+{
+	if (atomic_read(&psy->use_cnt) <= 0 ||
+			!psy->desc->property_is_writeable)
+		return -ENODEV;
+
+	return psy->desc->property_is_writeable(psy, psp);
+}
+EXPORT_SYMBOL_GPL(power_supply_property_is_writeable);
+
+void power_supply_external_power_changed(struct power_supply *psy)
+{
+	if (atomic_read(&psy->use_cnt) <= 0 ||
+			!psy->desc->external_power_changed)
+		return;
+
+	psy->desc->external_power_changed(psy);
+}
+EXPORT_SYMBOL_GPL(power_supply_external_power_changed);
+
 int power_supply_powers(struct power_supply *psy, struct device *dev)
 {
-	return sysfs_create_link(&psy->dev->kobj, &dev->kobj, "powers");
+	return sysfs_create_link(&psy->dev.kobj, &dev->kobj, "powers");
 }
 EXPORT_SYMBOL_GPL(power_supply_powers);
 
 static void power_supply_dev_release(struct device *dev)
 {
+	struct power_supply *psy = container_of(dev, struct power_supply, dev);
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
-	kfree(dev);
+	kfree(psy);
 }
 
 int power_supply_reg_notifier(struct notifier_block *nb)
@@ -400,7 +500,7 @@
 
 	WARN_ON(tzd == NULL);
 	psy = tzd->devdata;
-	ret = psy->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
 
 	/* Convert tenths of degree Celsius to milli degree Celsius. */
 	if (!ret)
@@ -417,14 +517,14 @@
 {
 	int i;
 
-	if (psy->no_thermal)
+	if (psy->desc->no_thermal)
 		return 0;
 
 	/* Register battery zone device psy reports temperature */
-	for (i = 0; i < psy->num_properties; i++) {
-		if (psy->properties[i] == POWER_SUPPLY_PROP_TEMP) {
-			psy->tzd = thermal_zone_device_register(psy->name, 0, 0,
-					psy, &psy_tzd_ops, NULL, 0, 0);
+	for (i = 0; i < psy->desc->num_properties; i++) {
+		if (psy->desc->properties[i] == POWER_SUPPLY_PROP_TEMP) {
+			psy->tzd = thermal_zone_device_register(psy->desc->name,
+					0, 0, psy, &psy_tzd_ops, NULL, 0, 0);
 			return PTR_ERR_OR_ZERO(psy->tzd);
 		}
 	}
@@ -447,7 +547,7 @@
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->get_property(psy,
+	ret = psy->desc->get_property(psy,
 		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
 	if (!ret)
 		*state = val.intval;
@@ -463,7 +563,7 @@
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->get_property(psy,
+	ret = psy->desc->get_property(psy,
 		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
 	if (!ret)
 		*state = val.intval;
@@ -480,7 +580,7 @@
 
 	psy = tcd->devdata;
 	val.intval = state;
-	ret = psy->set_property(psy,
+	ret = psy->desc->set_property(psy,
 		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
 
 	return ret;
@@ -497,11 +597,11 @@
 	int i;
 
 	/* Register for cooling device if psy can control charging */
-	for (i = 0; i < psy->num_properties; i++) {
-		if (psy->properties[i] ==
+	for (i = 0; i < psy->desc->num_properties; i++) {
+		if (psy->desc->properties[i] ==
 				POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT) {
 			psy->tcd = thermal_cooling_device_register(
-							(char *)psy->name,
+							(char *)psy->desc->name,
 							psy, &psy_tcd_ops);
 			return PTR_ERR_OR_ZERO(psy->tcd);
 		}
@@ -535,15 +635,21 @@
 }
 #endif
 
-static int __power_supply_register(struct device *parent,
-				   struct power_supply *psy, bool ws)
+static struct power_supply *__must_check
+__power_supply_register(struct device *parent,
+				   const struct power_supply_desc *desc,
+				   const struct power_supply_config *cfg,
+				   bool ws)
 {
 	struct device *dev;
+	struct power_supply *psy;
 	int rc;
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
-		return -ENOMEM;
+	psy = kzalloc(sizeof(*psy), GFP_KERNEL);
+	if (!psy)
+		return ERR_PTR(-ENOMEM);
+
+	dev = &psy->dev;
 
 	device_initialize(dev);
 
@@ -552,9 +658,16 @@
 	dev->parent = parent;
 	dev->release = power_supply_dev_release;
 	dev_set_drvdata(dev, psy);
-	psy->dev = dev;
+	psy->desc = desc;
+	atomic_inc(&psy->use_cnt);
+	if (cfg) {
+		psy->drv_data = cfg->drv_data;
+		psy->of_node = cfg->of_node;
+		psy->supplied_to = cfg->supplied_to;
+		psy->num_supplicants = cfg->num_supplicants;
+	}
 
-	rc = dev_set_name(dev, "%s", psy->name);
+	rc = dev_set_name(dev, "%s", desc->name);
 	if (rc)
 		goto dev_set_name_failed;
 
@@ -589,7 +702,7 @@
 
 	power_supply_changed(psy);
 
-	return 0;
+	return psy;
 
 create_triggers_failed:
 	psy_unregister_cooler(psy);
@@ -602,33 +715,155 @@
 check_supplies_failed:
 dev_set_name_failed:
 	put_device(dev);
-	return rc;
+	return ERR_PTR(rc);
 }
 
-int power_supply_register(struct device *parent, struct power_supply *psy)
+/**
+ * power_supply_register() - Register new power supply
+ * @parent:	Device to be a parent of power supply's device
+ * @desc:	Description of power supply, must be valid through whole
+ *		lifetime of this power supply
+ * @cfg:	Run-time specific configuration accessed during registering,
+ *		may be NULL
+ *
+ * Return: A pointer to newly allocated power_supply on success
+ * or ERR_PTR otherwise.
+ * Use power_supply_unregister() on returned power_supply pointer to release
+ * resources.
+ */
+struct power_supply *__must_check power_supply_register(struct device *parent,
+		const struct power_supply_desc *desc,
+		const struct power_supply_config *cfg)
 {
-	return __power_supply_register(parent, psy, true);
+	return __power_supply_register(parent, desc, cfg, true);
 }
 EXPORT_SYMBOL_GPL(power_supply_register);
 
-int power_supply_register_no_ws(struct device *parent, struct power_supply *psy)
+/**
+ * power_supply_register() - Register new non-waking-source power supply
+ * @parent:	Device to be a parent of power supply's device
+ * @desc:	Description of power supply, must be valid through whole
+ *		lifetime of this power supply
+ * @cfg:	Run-time specific configuration accessed during registering,
+ *		may be NULL
+ *
+ * Return: A pointer to newly allocated power_supply on success
+ * or ERR_PTR otherwise.
+ * Use power_supply_unregister() on returned power_supply pointer to release
+ * resources.
+ */
+struct power_supply *__must_check
+power_supply_register_no_ws(struct device *parent,
+		const struct power_supply_desc *desc,
+		const struct power_supply_config *cfg)
 {
-	return __power_supply_register(parent, psy, false);
+	return __power_supply_register(parent, desc, cfg, false);
 }
 EXPORT_SYMBOL_GPL(power_supply_register_no_ws);
 
+static void devm_power_supply_release(struct device *dev, void *res)
+{
+	struct power_supply **psy = res;
+
+	power_supply_unregister(*psy);
+}
+
+/**
+ * power_supply_register() - Register managed power supply
+ * @parent:	Device to be a parent of power supply's device
+ * @desc:	Description of power supply, must be valid through whole
+ *		lifetime of this power supply
+ * @cfg:	Run-time specific configuration accessed during registering,
+ *		may be NULL
+ *
+ * Return: A pointer to newly allocated power_supply on success
+ * or ERR_PTR otherwise.
+ * The returned power_supply pointer will be automatically unregistered
+ * on driver detach.
+ */
+struct power_supply *__must_check
+devm_power_supply_register(struct device *parent,
+		const struct power_supply_desc *desc,
+		const struct power_supply_config *cfg)
+{
+	struct power_supply **ptr, *psy;
+
+	ptr = devres_alloc(devm_power_supply_release, sizeof(*ptr), GFP_KERNEL);
+
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+	psy = __power_supply_register(parent, desc, cfg, true);
+	if (IS_ERR(psy)) {
+		devres_free(ptr);
+	} else {
+		*ptr = psy;
+		devres_add(parent, ptr);
+	}
+	return psy;
+}
+EXPORT_SYMBOL_GPL(devm_power_supply_register);
+
+/**
+ * power_supply_register() - Register managed non-waking-source power supply
+ * @parent:	Device to be a parent of power supply's device
+ * @desc:	Description of power supply, must be valid through whole
+ *		lifetime of this power supply
+ * @cfg:	Run-time specific configuration accessed during registering,
+ *		may be NULL
+ *
+ * Return: A pointer to newly allocated power_supply on success
+ * or ERR_PTR otherwise.
+ * The returned power_supply pointer will be automatically unregistered
+ * on driver detach.
+ */
+struct power_supply *__must_check
+devm_power_supply_register_no_ws(struct device *parent,
+		const struct power_supply_desc *desc,
+		const struct power_supply_config *cfg)
+{
+	struct power_supply **ptr, *psy;
+
+	ptr = devres_alloc(devm_power_supply_release, sizeof(*ptr), GFP_KERNEL);
+
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+	psy = __power_supply_register(parent, desc, cfg, false);
+	if (IS_ERR(psy)) {
+		devres_free(ptr);
+	} else {
+		*ptr = psy;
+		devres_add(parent, ptr);
+	}
+	return psy;
+}
+EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws);
+
+/**
+ * power_supply_unregister() - Remove this power supply from system
+ * @psy:	Pointer to power supply to unregister
+ *
+ * Remove this power supply from the system. The resources of power supply
+ * will be freed here or on last power_supply_put() call.
+ */
 void power_supply_unregister(struct power_supply *psy)
 {
+	WARN_ON(atomic_dec_return(&psy->use_cnt));
 	cancel_work_sync(&psy->changed_work);
-	sysfs_remove_link(&psy->dev->kobj, "powers");
+	sysfs_remove_link(&psy->dev.kobj, "powers");
 	power_supply_remove_triggers(psy);
 	psy_unregister_cooler(psy);
 	psy_unregister_thermal(psy);
-	device_init_wakeup(psy->dev, false);
-	device_unregister(psy->dev);
+	device_init_wakeup(&psy->dev, false);
+	device_unregister(&psy->dev);
 }
 EXPORT_SYMBOL_GPL(power_supply_unregister);
 
+void *power_supply_get_drvdata(struct power_supply *psy)
+{
+	return psy->drv_data;
+}
+EXPORT_SYMBOL_GPL(power_supply_get_drvdata);
+
 static int __init power_supply_class_init(void)
 {
 	power_supply_class = class_create(THIS_MODULE, "power_supply");
diff --git a/drivers/power/power_supply_leds.c b/drivers/power/power_supply_leds.c
index effa093..2d41a43 100644
--- a/drivers/power/power_supply_leds.c
+++ b/drivers/power/power_supply_leds.c
@@ -25,10 +25,10 @@
 	unsigned long delay_on = 0;
 	unsigned long delay_off = 0;
 
-	if (psy->get_property(psy, POWER_SUPPLY_PROP_STATUS, &status))
+	if (psy->desc->get_property(psy, POWER_SUPPLY_PROP_STATUS, &status))
 		return;
 
-	dev_dbg(psy->dev, "%s %d\n", __func__, status.intval);
+	dev_dbg(&psy->dev, "%s %d\n", __func__, status.intval);
 
 	switch (status.intval) {
 	case POWER_SUPPLY_STATUS_FULL:
@@ -58,21 +58,21 @@
 static int power_supply_create_bat_triggers(struct power_supply *psy)
 {
 	psy->charging_full_trig_name = kasprintf(GFP_KERNEL,
-					"%s-charging-or-full", psy->name);
+					"%s-charging-or-full", psy->desc->name);
 	if (!psy->charging_full_trig_name)
 		goto charging_full_failed;
 
 	psy->charging_trig_name = kasprintf(GFP_KERNEL,
-					"%s-charging", psy->name);
+					"%s-charging", psy->desc->name);
 	if (!psy->charging_trig_name)
 		goto charging_failed;
 
-	psy->full_trig_name = kasprintf(GFP_KERNEL, "%s-full", psy->name);
+	psy->full_trig_name = kasprintf(GFP_KERNEL, "%s-full", psy->desc->name);
 	if (!psy->full_trig_name)
 		goto full_failed;
 
 	psy->charging_blink_full_solid_trig_name = kasprintf(GFP_KERNEL,
-		"%s-charging-blink-full-solid", psy->name);
+		"%s-charging-blink-full-solid", psy->desc->name);
 	if (!psy->charging_blink_full_solid_trig_name)
 		goto charging_blink_full_solid_failed;
 
@@ -115,10 +115,10 @@
 {
 	union power_supply_propval online;
 
-	if (psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &online))
+	if (psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &online))
 		return;
 
-	dev_dbg(psy->dev, "%s %d\n", __func__, online.intval);
+	dev_dbg(&psy->dev, "%s %d\n", __func__, online.intval);
 
 	if (online.intval)
 		led_trigger_event(psy->online_trig, LED_FULL);
@@ -128,7 +128,8 @@
 
 static int power_supply_create_gen_triggers(struct power_supply *psy)
 {
-	psy->online_trig_name = kasprintf(GFP_KERNEL, "%s-online", psy->name);
+	psy->online_trig_name = kasprintf(GFP_KERNEL, "%s-online",
+					  psy->desc->name);
 	if (!psy->online_trig_name)
 		return -ENOMEM;
 
@@ -147,7 +148,7 @@
 
 void power_supply_update_leds(struct power_supply *psy)
 {
-	if (psy->type == POWER_SUPPLY_TYPE_BATTERY)
+	if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
 		power_supply_update_bat_leds(psy);
 	else
 		power_supply_update_gen_leds(psy);
@@ -155,14 +156,14 @@
 
 int power_supply_create_triggers(struct power_supply *psy)
 {
-	if (psy->type == POWER_SUPPLY_TYPE_BATTERY)
+	if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
 		return power_supply_create_bat_triggers(psy);
 	return power_supply_create_gen_triggers(psy);
 }
 
 void power_supply_remove_triggers(struct power_supply *psy)
 {
-	if (psy->type == POWER_SUPPLY_TYPE_BATTERY)
+	if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
 		power_supply_remove_bat_triggers(psy);
 	else
 		power_supply_remove_gen_triggers(psy);
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 62653f5..9134e3d 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -74,9 +74,9 @@
 	union power_supply_propval value;
 
 	if (off == POWER_SUPPLY_PROP_TYPE) {
-		value.intval = psy->type;
+		value.intval = psy->desc->type;
 	} else {
-		ret = psy->get_property(psy, off, &value);
+		ret = power_supply_get_property(psy, off, &value);
 
 		if (ret < 0) {
 			if (ret == -ENODATA)
@@ -125,7 +125,7 @@
 
 	value.intval = long_val;
 
-	ret = psy->set_property(psy, off, &value);
+	ret = psy->desc->set_property(psy, off, &value);
 	if (ret < 0)
 		return ret;
 
@@ -218,12 +218,12 @@
 	if (attrno == POWER_SUPPLY_PROP_TYPE)
 		return mode;
 
-	for (i = 0; i < psy->num_properties; i++) {
-		int property = psy->properties[i];
+	for (i = 0; i < psy->desc->num_properties; i++) {
+		int property = psy->desc->properties[i];
 
 		if (property == attrno) {
-			if (psy->property_is_writeable &&
-			    psy->property_is_writeable(psy, property) > 0)
+			if (psy->desc->property_is_writeable &&
+			    power_supply_property_is_writeable(psy, property) > 0)
 				mode |= S_IWUSR;
 
 			return mode;
@@ -279,14 +279,14 @@
 
 	dev_dbg(dev, "uevent\n");
 
-	if (!psy || !psy->dev) {
+	if (!psy || !psy->desc) {
 		dev_dbg(dev, "No power supply yet\n");
 		return ret;
 	}
 
-	dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->name);
+	dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->desc->name);
 
-	ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->name);
+	ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->desc->name);
 	if (ret)
 		return ret;
 
@@ -294,11 +294,11 @@
 	if (!prop_buf)
 		return -ENOMEM;
 
-	for (j = 0; j < psy->num_properties; j++) {
+	for (j = 0; j < psy->desc->num_properties; j++) {
 		struct device_attribute *attr;
 		char *line;
 
-		attr = &power_supply_attrs[psy->properties[j]];
+		attr = &power_supply_attrs[psy->desc->properties[j]];
 
 		ret = power_supply_show_property(dev, attr, prop_buf);
 		if (ret == -ENODEV || ret == -ENODATA) {
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 27f6646..aad9c33 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -151,9 +151,17 @@
 	help
 	  Reboot support for generic SYSCON mapped register reset.
 
+config POWER_RESET_SYSCON_POWEROFF
+	bool "Generic SYSCON regmap poweroff driver"
+	depends on OF
+	select MFD_SYSCON
+	help
+	  Poweroff support for generic SYSCON mapped register poweroff.
+
 config POWER_RESET_RMOBILE
 	tristate "Renesas R-Mobile reset driver"
 	depends on ARCH_RMOBILE || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  Reboot support for Renesas R-Mobile and SH-Mobile SoCs.
 
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index 11de15b..dbe06c3 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -17,4 +17,5 @@
 obj-$(CONFIG_POWER_RESET_XGENE) += xgene-reboot.o
 obj-$(CONFIG_POWER_RESET_KEYSTONE) += keystone-reset.o
 obj-$(CONFIG_POWER_RESET_SYSCON) += syscon-reboot.o
+obj-$(CONFIG_POWER_RESET_SYSCON_POWEROFF) += syscon-poweroff.o
 obj-$(CONFIG_POWER_RESET_RMOBILE) += rmobile-reset.o
diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c
index 4b72ea5..9847cfb 100644
--- a/drivers/power/reset/at91-poweroff.c
+++ b/drivers/power/reset/at91-poweroff.c
@@ -140,7 +140,7 @@
 	return 0;
 }
 
-static struct of_device_id at91_poweroff_of_match[] = {
+static const struct of_device_id at91_poweroff_of_match[] = {
 	{ .compatible = "atmel,at91sam9260-shdwc", },
 	{ .compatible = "atmel,at91sam9rl-shdwc", },
 	{ .compatible = "atmel,at91sam9x5-shdwc", },
diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c
index 13584e2..01c7055 100644
--- a/drivers/power/reset/at91-reset.c
+++ b/drivers/power/reset/at91-reset.c
@@ -73,8 +73,8 @@
 		: "r" (at91_ramc_base[0]),
 		  "r" (at91_rstc_base),
 		  "r" (1),
-		  "r" (AT91_SDRAMC_LPCB_POWER_DOWN),
-		  "r" (AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST));
+		  "r" cpu_to_le32(AT91_SDRAMC_LPCB_POWER_DOWN),
+		  "r" cpu_to_le32(AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST));
 
 	return NOTIFY_DONE;
 }
@@ -116,8 +116,8 @@
 		  "r" (at91_ramc_base[1]),
 		  "r" (at91_rstc_base),
 		  "r" (1),
-		  "r" (AT91_DDRSDRC_LPCB_POWER_DOWN),
-		  "r" (AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST)
+		  "r" cpu_to_le32(AT91_DDRSDRC_LPCB_POWER_DOWN),
+		  "r" cpu_to_le32(AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST)
 		: "r0");
 
 	return NOTIFY_DONE;
@@ -152,14 +152,14 @@
 	pr_info("AT91: Starting after %s\n", reason);
 }
 
-static struct of_device_id at91_ramc_of_match[] = {
+static const struct of_device_id at91_ramc_of_match[] = {
 	{ .compatible = "atmel,at91sam9260-sdramc", },
 	{ .compatible = "atmel,at91sam9g45-ddramc", },
 	{ .compatible = "atmel,sama5d3-ddramc", },
 	{ /* sentinel */ }
 };
 
-static struct of_device_id at91_reset_of_match[] = {
+static const struct of_device_id at91_reset_of_match[] = {
 	{ .compatible = "atmel,at91sam9260-rstc", .data = at91sam9260_restart },
 	{ .compatible = "atmel,at91sam9g45-rstc", .data = at91sam9g45_restart },
 	{ /* sentinel */ }
diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c
index 5385460..9ab7f56 100644
--- a/drivers/power/reset/hisi-reboot.c
+++ b/drivers/power/reset/hisi-reboot.c
@@ -64,7 +64,7 @@
 	return err;
 }
 
-static struct of_device_id hisi_reboot_of_match[] = {
+static const struct of_device_id hisi_reboot_of_match[] = {
 	{ .compatible = "hisilicon,sysctrl" },
 	{}
 };
diff --git a/drivers/power/reset/keystone-reset.c b/drivers/power/reset/keystone-reset.c
index faedf16..c70f1bf 100644
--- a/drivers/power/reset/keystone-reset.c
+++ b/drivers/power/reset/keystone-reset.c
@@ -70,7 +70,7 @@
 	.priority = 128,
 };
 
-static struct of_device_id rsctrl_of_match[] = {
+static const struct of_device_id rsctrl_of_match[] = {
 	{.compatible = "ti,keystone-reset", },
 	{},
 };
diff --git a/drivers/power/reset/st-poweroff.c b/drivers/power/reset/st-poweroff.c
index 27383de..a488877 100644
--- a/drivers/power/reset/st-poweroff.c
+++ b/drivers/power/reset/st-poweroff.c
@@ -97,7 +97,7 @@
 	.priority = 192,
 };
 
-static struct of_device_id st_reset_of_match[] = {
+static const struct of_device_id st_reset_of_match[] = {
 	{
 		.compatible = "st,stih415-restart",
 		.data = (void *)&stih415_reset,
diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c
new file mode 100644
index 0000000..5560b0d
--- /dev/null
+++ b/drivers/power/reset/syscon-poweroff.c
@@ -0,0 +1,102 @@
+/*
+ * Generic Syscon Poweroff Driver
+ *
+ * Copyright (c) 2015, National Instruments Corp.
+ * Author: Moritz Fischer <moritz.fischer@ettus.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/notifier.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+
+static struct regmap *map;
+static u32 offset;
+static u32 mask;
+
+void syscon_poweroff(void)
+{
+	/* Issue the poweroff */
+	regmap_write(map, offset, mask);
+
+	mdelay(1000);
+
+	pr_emerg("Unable to poweroff system\n");
+}
+
+static int syscon_poweroff_probe(struct platform_device *pdev)
+{
+	char symname[KSYM_NAME_LEN];
+
+	map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap");
+	if (IS_ERR(map)) {
+		dev_err(&pdev->dev, "unable to get syscon");
+		return PTR_ERR(map);
+	}
+
+	if (of_property_read_u32(pdev->dev.of_node, "offset", &offset)) {
+		dev_err(&pdev->dev, "unable to read 'offset'");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(pdev->dev.of_node, "mask", &mask)) {
+		dev_err(&pdev->dev, "unable to read 'mask'");
+		return -EINVAL;
+	}
+
+	if (pm_power_off) {
+		lookup_symbol_name((ulong)pm_power_off, symname);
+		dev_err(&pdev->dev,
+		"pm_power_off already claimed %p %s",
+		pm_power_off, symname);
+		return -EBUSY;
+	}
+
+	pm_power_off = syscon_poweroff;
+
+	return 0;
+}
+
+static int syscon_poweroff_remove(struct platform_device *pdev)
+{
+	if (pm_power_off == syscon_poweroff)
+		pm_power_off = NULL;
+
+	return 0;
+}
+
+static const struct of_device_id syscon_poweroff_of_match[] = {
+	{ .compatible = "syscon-poweroff" },
+	{}
+};
+
+static struct platform_driver syscon_poweroff_driver = {
+	.probe = syscon_poweroff_probe,
+	.remove = syscon_poweroff_remove,
+	.driver = {
+		.name = "syscon-poweroff",
+		.of_match_table = syscon_poweroff_of_match,
+	},
+};
+
+static int __init syscon_poweroff_register(void)
+{
+	return platform_driver_register(&syscon_poweroff_driver);
+}
+device_initcall(syscon_poweroff_register);
diff --git a/drivers/power/reset/syscon-reboot.c b/drivers/power/reset/syscon-reboot.c
index c4049f4..d3c7d24 100644
--- a/drivers/power/reset/syscon-reboot.c
+++ b/drivers/power/reset/syscon-reboot.c
@@ -76,7 +76,7 @@
 	return err;
 }
 
-static struct of_device_id syscon_reboot_of_match[] = {
+static const struct of_device_id syscon_reboot_of_match[] = {
 	{ .compatible = "syscon-reboot" },
 	{}
 };
diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c
index be12d9b..6a9bf70 100644
--- a/drivers/power/reset/vexpress-poweroff.c
+++ b/drivers/power/reset/vexpress-poweroff.c
@@ -80,7 +80,7 @@
 
 enum vexpress_reset_func { FUNC_RESET, FUNC_SHUTDOWN, FUNC_REBOOT };
 
-static struct of_device_id vexpress_reset_of_match[] = {
+static const struct of_device_id vexpress_reset_of_match[] = {
 	{
 		.compatible = "arm,vexpress-reset",
 		.data = (void *)FUNC_RESET,
diff --git a/drivers/power/reset/xgene-reboot.c b/drivers/power/reset/xgene-reboot.c
index b0e5002..f07e93c 100644
--- a/drivers/power/reset/xgene-reboot.c
+++ b/drivers/power/reset/xgene-reboot.c
@@ -87,7 +87,7 @@
 	return err;
 }
 
-static struct of_device_id xgene_reboot_of_match[] = {
+static const struct of_device_id xgene_reboot_of_match[] = {
 	{ .compatible = "apm,xgene-reboot" },
 	{}
 };
diff --git a/drivers/power/rt5033_battery.c b/drivers/power/rt5033_battery.c
index 7b898f4..a7a6877 100644
--- a/drivers/power/rt5033_battery.c
+++ b/drivers/power/rt5033_battery.c
@@ -72,8 +72,7 @@
 		enum power_supply_property psp,
 		union power_supply_propval *val)
 {
-	struct rt5033_battery *battery = container_of(psy,
-				struct rt5033_battery, psy);
+	struct rt5033_battery *battery = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
@@ -102,16 +101,25 @@
 	POWER_SUPPLY_PROP_CAPACITY,
 };
 
-static struct regmap_config rt5033_battery_regmap_config = {
+static const struct regmap_config rt5033_battery_regmap_config = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
 	.max_register	= RT5033_FUEL_REG_END,
 };
 
+static const struct power_supply_desc rt5033_battery_desc = {
+	.name		= "rt5033-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= rt5033_battery_get_property,
+	.properties	= rt5033_battery_props,
+	.num_properties	= ARRAY_SIZE(rt5033_battery_props),
+};
+
 static int rt5033_battery_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct power_supply_config psy_cfg = {};
 	struct rt5033_battery *battery;
 	u32 ret;
 
@@ -131,16 +139,13 @@
 	}
 
 	i2c_set_clientdata(client, battery);
+	psy_cfg.drv_data = battery;
 
-	battery->psy.name		= "rt5033-battery";
-	battery->psy.type		= POWER_SUPPLY_TYPE_BATTERY;
-	battery->psy.get_property	= rt5033_battery_get_property;
-	battery->psy.properties		= rt5033_battery_props;
-	battery->psy.num_properties	= ARRAY_SIZE(rt5033_battery_props);
-
-	ret = power_supply_register(&client->dev, &battery->psy);
-	if (ret) {
+	battery->psy = power_supply_register(&client->dev,
+					     &rt5033_battery_desc, &psy_cfg);
+	if (IS_ERR(battery->psy)) {
 		dev_err(&client->dev, "Failed to register power supply\n");
+		ret = PTR_ERR(battery->psy);
 		return ret;
 	}
 
@@ -151,7 +156,7 @@
 {
 	struct rt5033_battery *battery = i2c_get_clientdata(client);
 
-	power_supply_unregister(&battery->psy);
+	power_supply_unregister(battery->psy);
 
 	return 0;
 }
diff --git a/drivers/power/rx51_battery.c b/drivers/power/rx51_battery.c
index a01aacb..ac62069 100644
--- a/drivers/power/rx51_battery.c
+++ b/drivers/power/rx51_battery.c
@@ -29,7 +29,8 @@
 
 struct rx51_device_info {
 	struct device *dev;
-	struct power_supply bat;
+	struct power_supply *bat;
+	struct power_supply_desc bat_desc;
 	struct iio_channel *channel_temp;
 	struct iio_channel *channel_bsi;
 	struct iio_channel *channel_vbat;
@@ -161,8 +162,7 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
-	struct rx51_device_info *di = container_of((psy),
-				struct rx51_device_info, bat);
+	struct rx51_device_info *di = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_TECHNOLOGY:
@@ -204,6 +204,7 @@
 
 static int rx51_battery_probe(struct platform_device *pdev)
 {
+	struct power_supply_config psy_cfg = {};
 	struct rx51_device_info *di;
 	int ret;
 
@@ -214,11 +215,13 @@
 	platform_set_drvdata(pdev, di);
 
 	di->dev = &pdev->dev;
-	di->bat.name = dev_name(&pdev->dev);
-	di->bat.type = POWER_SUPPLY_TYPE_BATTERY;
-	di->bat.properties = rx51_battery_props;
-	di->bat.num_properties = ARRAY_SIZE(rx51_battery_props);
-	di->bat.get_property = rx51_battery_get_property;
+	di->bat_desc.name = dev_name(&pdev->dev);
+	di->bat_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+	di->bat_desc.properties = rx51_battery_props;
+	di->bat_desc.num_properties = ARRAY_SIZE(rx51_battery_props);
+	di->bat_desc.get_property = rx51_battery_get_property;
+
+	psy_cfg.drv_data = di;
 
 	di->channel_temp = iio_channel_get(di->dev, "temp");
 	if (IS_ERR(di->channel_temp)) {
@@ -238,9 +241,11 @@
 		goto error_channel_bsi;
 	}
 
-	ret = power_supply_register(di->dev, &di->bat);
-	if (ret)
+	di->bat = power_supply_register(di->dev, &di->bat_desc, &psy_cfg);
+	if (IS_ERR(di->bat)) {
+		ret = PTR_ERR(di->bat);
 		goto error_channel_vbat;
+	}
 
 	return 0;
 
@@ -259,7 +264,7 @@
 {
 	struct rx51_device_info *di = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&di->bat);
+	power_supply_unregister(di->bat);
 
 	iio_channel_release(di->channel_vbat);
 	iio_channel_release(di->channel_bsi);
diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c
index 5948ce0..0ffe5cd 100644
--- a/drivers/power/s3c_adc_battery.c
+++ b/drivers/power/s3c_adc_battery.c
@@ -28,7 +28,7 @@
 #define JITTER_DELAY			500 /* ms */
 
 struct s3c_adc_bat {
-	struct power_supply		psy;
+	struct power_supply		*psy;
 	struct s3c_adc_client		*client;
 	struct s3c_adc_bat_pdata	*pdata;
 	int				volt_value;
@@ -73,10 +73,10 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct s3c_adc_bat *bat = container_of(psy, struct s3c_adc_bat, psy);
+	struct s3c_adc_bat *bat = power_supply_get_drvdata(psy);
 
 	if (!bat) {
-		dev_err(psy->dev, "%s: no battery infos ?!\n", __func__);
+		dev_err(&psy->dev, "%s: no battery infos ?!\n", __func__);
 		return -EINVAL;
 	}
 
@@ -105,17 +105,17 @@
 	}
 }
 
-static struct s3c_adc_bat backup_bat = {
-	.psy = {
-		.name		= "backup-battery",
-		.type		= POWER_SUPPLY_TYPE_BATTERY,
-		.properties	= s3c_adc_backup_bat_props,
-		.num_properties = ARRAY_SIZE(s3c_adc_backup_bat_props),
-		.get_property	= s3c_adc_backup_bat_get_property,
-		.use_for_apm	= 1,
-	},
+static const struct power_supply_desc backup_bat_desc = {
+	.name		= "backup-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= s3c_adc_backup_bat_props,
+	.num_properties = ARRAY_SIZE(s3c_adc_backup_bat_props),
+	.get_property	= s3c_adc_backup_bat_get_property,
+	.use_for_apm	= 1,
 };
 
+static struct s3c_adc_bat backup_bat;
+
 static enum power_supply_property s3c_adc_main_bat_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
@@ -141,7 +141,7 @@
 				    enum power_supply_property psp,
 				    union power_supply_propval *val)
 {
-	struct s3c_adc_bat *bat = container_of(psy, struct s3c_adc_bat, psy);
+	struct s3c_adc_bat *bat = power_supply_get_drvdata(psy);
 
 	int new_level;
 	int full_volt;
@@ -149,7 +149,7 @@
 	unsigned int lut_size;
 
 	if (!bat) {
-		dev_err(psy->dev, "no battery infos ?!\n");
+		dev_err(&psy->dev, "no battery infos ?!\n");
 		return -EINVAL;
 	}
 
@@ -232,18 +232,18 @@
 	}
 }
 
-static struct s3c_adc_bat main_bat = {
-	.psy = {
-		.name			= "main-battery",
-		.type			= POWER_SUPPLY_TYPE_BATTERY,
-		.properties		= s3c_adc_main_bat_props,
-		.num_properties		= ARRAY_SIZE(s3c_adc_main_bat_props),
-		.get_property		= s3c_adc_bat_get_property,
-		.external_power_changed = s3c_adc_bat_ext_power_changed,
-		.use_for_apm		= 1,
-	},
+static const struct power_supply_desc main_bat_desc = {
+	.name			= "main-battery",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= s3c_adc_main_bat_props,
+	.num_properties		= ARRAY_SIZE(s3c_adc_main_bat_props),
+	.get_property		= s3c_adc_bat_get_property,
+	.external_power_changed = s3c_adc_bat_ext_power_changed,
+	.use_for_apm		= 1,
 };
 
+static struct s3c_adc_bat main_bat;
+
 static void s3c_adc_bat_work(struct work_struct *work)
 {
 	struct s3c_adc_bat *bat = &main_bat;
@@ -251,7 +251,7 @@
 	int is_plugged;
 	static int was_plugged;
 
-	is_plugged = power_supply_am_i_supplied(&bat->psy);
+	is_plugged = power_supply_am_i_supplied(bat->psy);
 	bat->cable_plugged = is_plugged;
 	if (is_plugged != was_plugged) {
 		was_plugged = is_plugged;
@@ -279,7 +279,7 @@
 		}
 	}
 
-	power_supply_changed(&bat->psy);
+	power_supply_changed(bat->psy);
 }
 
 static irqreturn_t s3c_adc_bat_charged(int irq, void *dev_id)
@@ -310,16 +310,25 @@
 	main_bat.cable_plugged = 0;
 	main_bat.status = POWER_SUPPLY_STATUS_DISCHARGING;
 
-	ret = power_supply_register(&pdev->dev, &main_bat.psy);
-	if (ret)
+	main_bat.psy = power_supply_register(&pdev->dev, &main_bat_desc, NULL);
+	if (IS_ERR(main_bat.psy)) {
+		ret = PTR_ERR(main_bat.psy);
 		goto err_reg_main;
+	}
 	if (pdata->backup_volt_mult) {
+		const struct power_supply_config psy_cfg
+						= { .drv_data = &backup_bat, };
+
 		backup_bat.client = client;
 		backup_bat.pdata = pdev->dev.platform_data;
 		backup_bat.volt_value = -1;
-		ret = power_supply_register(&pdev->dev, &backup_bat.psy);
-		if (ret)
+		backup_bat.psy = power_supply_register(&pdev->dev,
+						       &backup_bat_desc,
+						       &psy_cfg);
+		if (IS_ERR(backup_bat.psy)) {
+			ret = PTR_ERR(backup_bat.psy);
 			goto err_reg_backup;
+		}
 	}
 
 	INIT_DELAYED_WORK(&bat_work, s3c_adc_bat_work);
@@ -360,9 +369,9 @@
 		gpio_free(pdata->gpio_charge_finished);
 err_gpio:
 	if (pdata->backup_volt_mult)
-		power_supply_unregister(&backup_bat.psy);
+		power_supply_unregister(backup_bat.psy);
 err_reg_backup:
-	power_supply_unregister(&main_bat.psy);
+	power_supply_unregister(main_bat.psy);
 err_reg_main:
 	return ret;
 }
@@ -372,9 +381,9 @@
 	struct s3c_adc_client *client = platform_get_drvdata(pdev);
 	struct s3c_adc_bat_pdata *pdata = pdev->dev.platform_data;
 
-	power_supply_unregister(&main_bat.psy);
+	power_supply_unregister(main_bat.psy);
 	if (pdata->backup_volt_mult)
-		power_supply_unregister(&backup_bat.psy);
+		power_supply_unregister(backup_bat.psy);
 
 	s3c_adc_release(client);
 
diff --git a/drivers/power/sbs-battery.c b/drivers/power/sbs-battery.c
index c7b7b40..de11786 100644
--- a/drivers/power/sbs-battery.c
+++ b/drivers/power/sbs-battery.c
@@ -156,7 +156,7 @@
 
 struct sbs_info {
 	struct i2c_client		*client;
-	struct power_supply		power_supply;
+	struct power_supply		*power_supply;
 	struct sbs_platform_data	*pdata;
 	bool				is_present;
 	bool				gpio_detect;
@@ -391,7 +391,7 @@
 			chip->last_state = val->intval;
 		else if (chip->last_state != val->intval) {
 			cancel_delayed_work_sync(&chip->work);
-			power_supply_changed(&chip->power_supply);
+			power_supply_changed(chip->power_supply);
 			chip->poll_time = 0;
 		}
 	} else {
@@ -556,8 +556,7 @@
 	union power_supply_propval *val)
 {
 	int ret = 0;
-	struct sbs_info *chip = container_of(psy,
-				struct sbs_info, power_supply);
+	struct sbs_info *chip = power_supply_get_drvdata(psy);
 	struct i2c_client *client = chip->client;
 
 	switch (psp) {
@@ -638,7 +637,7 @@
 	if (!chip->gpio_detect &&
 		chip->is_present != (ret >= 0)) {
 		chip->is_present = (ret >= 0);
-		power_supply_changed(&chip->power_supply);
+		power_supply_changed(chip->power_supply);
 	}
 
 done:
@@ -671,9 +670,7 @@
 
 static void sbs_external_power_changed(struct power_supply *psy)
 {
-	struct sbs_info *chip;
-
-	chip = container_of(psy, struct sbs_info, power_supply);
+	struct sbs_info *chip = power_supply_get_drvdata(psy);
 
 	if (chip->ignore_changes > 0) {
 		chip->ignore_changes--;
@@ -712,7 +709,7 @@
 
 	if (chip->last_state != ret) {
 		chip->poll_time = 0;
-		power_supply_changed(&chip->power_supply);
+		power_supply_changed(chip->power_supply);
 		return;
 	}
 	if (chip->poll_time > 0) {
@@ -796,42 +793,48 @@
 }
 #endif
 
+static const struct power_supply_desc sbs_default_desc = {
+	.type = POWER_SUPPLY_TYPE_BATTERY,
+	.properties = sbs_properties,
+	.num_properties = ARRAY_SIZE(sbs_properties),
+	.get_property = sbs_get_property,
+	.external_power_changed = sbs_external_power_changed,
+};
+
 static int sbs_probe(struct i2c_client *client,
 	const struct i2c_device_id *id)
 {
 	struct sbs_info *chip;
+	struct power_supply_desc *sbs_desc;
 	struct sbs_platform_data *pdata = client->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 	int rc;
 	int irq;
-	char *name;
 
-	name = kasprintf(GFP_KERNEL, "sbs-%s", dev_name(&client->dev));
-	if (!name) {
-		dev_err(&client->dev, "Failed to allocate device name\n");
+	sbs_desc = devm_kmemdup(&client->dev, &sbs_default_desc,
+			sizeof(*sbs_desc), GFP_KERNEL);
+	if (!sbs_desc)
 		return -ENOMEM;
-	}
+
+	sbs_desc->name = devm_kasprintf(&client->dev, GFP_KERNEL, "sbs-%s",
+			dev_name(&client->dev));
+	if (!sbs_desc->name)
+		return -ENOMEM;
 
 	chip = kzalloc(sizeof(struct sbs_info), GFP_KERNEL);
-	if (!chip) {
-		rc = -ENOMEM;
-		goto exit_free_name;
-	}
+	if (!chip)
+		return -ENOMEM;
 
 	chip->client = client;
 	chip->enable_detection = false;
 	chip->gpio_detect = false;
-	chip->power_supply.name = name;
-	chip->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
-	chip->power_supply.properties = sbs_properties;
-	chip->power_supply.num_properties = ARRAY_SIZE(sbs_properties);
-	chip->power_supply.get_property = sbs_get_property;
-	chip->power_supply.of_node = client->dev.of_node;
+	psy_cfg.of_node = client->dev.of_node;
+	psy_cfg.drv_data = chip;
 	/* ignore first notification of external change, it is generated
 	 * from the power_supply_register call back
 	 */
 	chip->ignore_changes = 1;
 	chip->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
-	chip->power_supply.external_power_changed = sbs_external_power_changed;
 
 	pdata = sbs_of_populate_pdata(client);
 
@@ -870,7 +873,7 @@
 
 	rc = request_irq(irq, sbs_irq,
 		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-		dev_name(&client->dev), &chip->power_supply);
+		dev_name(&client->dev), chip->power_supply);
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
 		gpio_free(pdata->battery_detect);
@@ -892,10 +895,12 @@
 		goto exit_psupply;
 	}
 
-	rc = power_supply_register(&client->dev, &chip->power_supply);
-	if (rc) {
+	chip->power_supply = power_supply_register(&client->dev, sbs_desc,
+						   &psy_cfg);
+	if (IS_ERR(chip->power_supply)) {
 		dev_err(&client->dev,
 			"%s: Failed to register power supply\n", __func__);
+		rc = PTR_ERR(chip->power_supply);
 		goto exit_psupply;
 	}
 
@@ -910,15 +915,12 @@
 
 exit_psupply:
 	if (chip->irq)
-		free_irq(chip->irq, &chip->power_supply);
+		free_irq(chip->irq, chip->power_supply);
 	if (chip->gpio_detect)
 		gpio_free(pdata->battery_detect);
 
 	kfree(chip);
 
-exit_free_name:
-	kfree(name);
-
 	return rc;
 }
 
@@ -927,15 +929,14 @@
 	struct sbs_info *chip = i2c_get_clientdata(client);
 
 	if (chip->irq)
-		free_irq(chip->irq, &chip->power_supply);
+		free_irq(chip->irq, chip->power_supply);
 	if (chip->gpio_detect)
 		gpio_free(chip->pdata->battery_detect);
 
-	power_supply_unregister(&chip->power_supply);
+	power_supply_unregister(chip->power_supply);
 
 	cancel_delayed_work_sync(&chip->work);
 
-	kfree(chip->power_supply.name);
 	kfree(chip);
 	chip = NULL;
 
diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c
index acf84e8..0b60a0b 100644
--- a/drivers/power/smb347-charger.c
+++ b/drivers/power/smb347-charger.c
@@ -139,9 +139,9 @@
 	struct mutex		lock;
 	struct device		*dev;
 	struct regmap		*regmap;
-	struct power_supply	mains;
-	struct power_supply	usb;
-	struct power_supply	battery;
+	struct power_supply	*mains;
+	struct power_supply	*usb;
+	struct power_supply	*battery;
 	bool			mains_online;
 	bool			usb_online;
 	bool			charging_enabled;
@@ -741,7 +741,7 @@
 	 */
 	if (stat_c & STAT_C_CHARGER_ERROR) {
 		dev_err(smb->dev, "charging stopped due to charger error\n");
-		power_supply_changed(&smb->battery);
+		power_supply_changed(smb->battery);
 		handled = true;
 	}
 
@@ -752,7 +752,7 @@
 	 */
 	if (irqstat_c & (IRQSTAT_C_TERMINATION_IRQ | IRQSTAT_C_TAPER_IRQ)) {
 		if (irqstat_c & IRQSTAT_C_TERMINATION_STAT)
-			power_supply_changed(&smb->battery);
+			power_supply_changed(smb->battery);
 		dev_dbg(smb->dev, "going to HW maintenance mode\n");
 		handled = true;
 	}
@@ -766,7 +766,7 @@
 
 		if (irqstat_d & IRQSTAT_D_CHARGE_TIMEOUT_STAT)
 			dev_warn(smb->dev, "charging stopped due to timeout\n");
-		power_supply_changed(&smb->battery);
+		power_supply_changed(smb->battery);
 		handled = true;
 	}
 
@@ -778,9 +778,9 @@
 		if (smb347_update_ps_status(smb) > 0) {
 			smb347_start_stop_charging(smb);
 			if (smb->pdata->use_mains)
-				power_supply_changed(&smb->mains);
+				power_supply_changed(smb->mains);
 			if (smb->pdata->use_usb)
-				power_supply_changed(&smb->usb);
+				power_supply_changed(smb->usb);
 		}
 		handled = true;
 	}
@@ -842,7 +842,8 @@
 		goto fail;
 
 	ret = request_threaded_irq(irq, NULL, smb347_interrupt,
-				   IRQF_TRIGGER_FALLING, client->name, smb);
+				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				   client->name, smb);
 	if (ret < 0)
 		goto fail_gpio;
 
@@ -934,8 +935,7 @@
 				     enum power_supply_property prop,
 				     union power_supply_propval *val)
 {
-	struct smb347_charger *smb =
-		container_of(psy, struct smb347_charger, mains);
+	struct smb347_charger *smb = power_supply_get_drvdata(psy);
 	int ret;
 
 	switch (prop) {
@@ -976,8 +976,7 @@
 				   enum power_supply_property prop,
 				   union power_supply_propval *val)
 {
-	struct smb347_charger *smb =
-		container_of(psy, struct smb347_charger, usb);
+	struct smb347_charger *smb = power_supply_get_drvdata(psy);
 	int ret;
 
 	switch (prop) {
@@ -1063,8 +1062,7 @@
 				       enum power_supply_property prop,
 				       union power_supply_propval *val)
 {
-	struct smb347_charger *smb =
-			container_of(psy, struct smb347_charger, battery);
+	struct smb347_charger *smb = power_supply_get_drvdata(psy);
 	const struct smb347_charger_platform_data *pdata = smb->pdata;
 	int ret;
 
@@ -1188,11 +1186,36 @@
 	.readable_reg	= smb347_readable_reg,
 };
 
+static const struct power_supply_desc smb347_mains_desc = {
+	.name		= "smb347-mains",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.get_property	= smb347_mains_get_property,
+	.properties	= smb347_mains_properties,
+	.num_properties	= ARRAY_SIZE(smb347_mains_properties),
+};
+
+static const struct power_supply_desc smb347_usb_desc = {
+	.name		= "smb347-usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.get_property	= smb347_usb_get_property,
+	.properties	= smb347_usb_properties,
+	.num_properties	= ARRAY_SIZE(smb347_usb_properties),
+};
+
+static const struct power_supply_desc smb347_battery_desc = {
+	.name		= "smb347-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.get_property	= smb347_battery_get_property,
+	.properties	= smb347_battery_properties,
+	.num_properties	= ARRAY_SIZE(smb347_battery_properties),
+};
+
 static int smb347_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	static char *battery[] = { "smb347-battery" };
 	const struct smb347_charger_platform_data *pdata;
+	struct power_supply_config mains_usb_cfg = {}, battery_cfg = {};
 	struct device *dev = &client->dev;
 	struct smb347_charger *smb;
 	int ret;
@@ -1222,49 +1245,35 @@
 	if (ret < 0)
 		return ret;
 
+	mains_usb_cfg.supplied_to = battery;
+	mains_usb_cfg.num_supplicants = ARRAY_SIZE(battery);
+	mains_usb_cfg.drv_data = smb;
 	if (smb->pdata->use_mains) {
-		smb->mains.name = "smb347-mains";
-		smb->mains.type = POWER_SUPPLY_TYPE_MAINS;
-		smb->mains.get_property = smb347_mains_get_property;
-		smb->mains.properties = smb347_mains_properties;
-		smb->mains.num_properties = ARRAY_SIZE(smb347_mains_properties);
-		smb->mains.supplied_to = battery;
-		smb->mains.num_supplicants = ARRAY_SIZE(battery);
-		ret = power_supply_register(dev, &smb->mains);
-		if (ret < 0)
-			return ret;
+		smb->mains = power_supply_register(dev, &smb347_mains_desc,
+						   &mains_usb_cfg);
+		if (IS_ERR(smb->mains))
+			return PTR_ERR(smb->mains);
 	}
 
 	if (smb->pdata->use_usb) {
-		smb->usb.name = "smb347-usb";
-		smb->usb.type = POWER_SUPPLY_TYPE_USB;
-		smb->usb.get_property = smb347_usb_get_property;
-		smb->usb.properties = smb347_usb_properties;
-		smb->usb.num_properties = ARRAY_SIZE(smb347_usb_properties);
-		smb->usb.supplied_to = battery;
-		smb->usb.num_supplicants = ARRAY_SIZE(battery);
-		ret = power_supply_register(dev, &smb->usb);
-		if (ret < 0) {
+		smb->usb = power_supply_register(dev, &smb347_usb_desc,
+						 &mains_usb_cfg);
+		if (IS_ERR(smb->usb)) {
 			if (smb->pdata->use_mains)
-				power_supply_unregister(&smb->mains);
-			return ret;
+				power_supply_unregister(smb->mains);
+			return PTR_ERR(smb->usb);
 		}
 	}
 
-	smb->battery.name = "smb347-battery";
-	smb->battery.type = POWER_SUPPLY_TYPE_BATTERY;
-	smb->battery.get_property = smb347_battery_get_property;
-	smb->battery.properties = smb347_battery_properties;
-	smb->battery.num_properties = ARRAY_SIZE(smb347_battery_properties);
-
-
-	ret = power_supply_register(dev, &smb->battery);
-	if (ret < 0) {
+	battery_cfg.drv_data = smb;
+	smb->battery = power_supply_register(dev, &smb347_battery_desc,
+					     &battery_cfg);
+	if (IS_ERR(smb->battery)) {
 		if (smb->pdata->use_usb)
-			power_supply_unregister(&smb->usb);
+			power_supply_unregister(smb->usb);
 		if (smb->pdata->use_mains)
-			power_supply_unregister(&smb->mains);
-		return ret;
+			power_supply_unregister(smb->mains);
+		return PTR_ERR(smb->battery);
 	}
 
 	/*
@@ -1294,11 +1303,11 @@
 		gpio_free(smb->pdata->irq_gpio);
 	}
 
-	power_supply_unregister(&smb->battery);
+	power_supply_unregister(smb->battery);
 	if (smb->pdata->use_usb)
-		power_supply_unregister(&smb->usb);
+		power_supply_unregister(smb->usb);
 	if (smb->pdata->use_mains)
-		power_supply_unregister(&smb->mains);
+		power_supply_unregister(smb->mains);
 	return 0;
 }
 
diff --git a/drivers/power/test_power.c b/drivers/power/test_power.c
index f26b1fa..f986e0c 100644
--- a/drivers/power/test_power.c
+++ b/drivers/power/test_power.c
@@ -153,12 +153,12 @@
 	"test_battery",
 };
 
-static struct power_supply test_power_supplies[] = {
+static struct power_supply *test_power_supplies[TEST_POWER_NUM];
+
+static const struct power_supply_desc test_power_desc[] = {
 	[TEST_AC] = {
 		.name = "test_ac",
 		.type = POWER_SUPPLY_TYPE_MAINS,
-		.supplied_to = test_power_ac_supplied_to,
-		.num_supplicants = ARRAY_SIZE(test_power_ac_supplied_to),
 		.properties = test_power_ac_props,
 		.num_properties = ARRAY_SIZE(test_power_ac_props),
 		.get_property = test_power_get_ac_property,
@@ -173,14 +173,25 @@
 	[TEST_USB] = {
 		.name = "test_usb",
 		.type = POWER_SUPPLY_TYPE_USB,
-		.supplied_to = test_power_ac_supplied_to,
-		.num_supplicants = ARRAY_SIZE(test_power_ac_supplied_to),
 		.properties = test_power_ac_props,
 		.num_properties = ARRAY_SIZE(test_power_ac_props),
 		.get_property = test_power_get_usb_property,
 	},
 };
 
+static const struct power_supply_config test_power_configs[] = {
+	{
+		/* test_ac */
+		.supplied_to = test_power_ac_supplied_to,
+		.num_supplicants = ARRAY_SIZE(test_power_ac_supplied_to),
+	}, {
+		/* test_battery */
+	}, {
+		/* test_usb */
+		.supplied_to = test_power_ac_supplied_to,
+		.num_supplicants = ARRAY_SIZE(test_power_ac_supplied_to),
+	},
+};
 
 static int __init test_power_init(void)
 {
@@ -188,12 +199,16 @@
 	int ret;
 
 	BUILD_BUG_ON(TEST_POWER_NUM != ARRAY_SIZE(test_power_supplies));
+	BUILD_BUG_ON(TEST_POWER_NUM != ARRAY_SIZE(test_power_configs));
 
 	for (i = 0; i < ARRAY_SIZE(test_power_supplies); i++) {
-		ret = power_supply_register(NULL, &test_power_supplies[i]);
-		if (ret) {
+		test_power_supplies[i] = power_supply_register(NULL,
+						&test_power_desc[i],
+						&test_power_configs[i]);
+		if (IS_ERR(test_power_supplies[i])) {
 			pr_err("%s: failed to register %s\n", __func__,
-				test_power_supplies[i].name);
+				test_power_desc[i].name);
+			ret = PTR_ERR(test_power_supplies[i]);
 			goto failed;
 		}
 	}
@@ -202,7 +217,7 @@
 	return 0;
 failed:
 	while (--i >= 0)
-		power_supply_unregister(&test_power_supplies[i]);
+		power_supply_unregister(test_power_supplies[i]);
 	return ret;
 }
 module_init(test_power_init);
@@ -216,13 +231,13 @@
 	usb_online = 0;
 	battery_status = POWER_SUPPLY_STATUS_DISCHARGING;
 	for (i = 0; i < ARRAY_SIZE(test_power_supplies); i++)
-		power_supply_changed(&test_power_supplies[i]);
+		power_supply_changed(test_power_supplies[i]);
 	pr_info("%s: 'changed' event sent, sleeping for 10 seconds...\n",
 		__func__);
 	ssleep(10);
 
 	for (i = 0; i < ARRAY_SIZE(test_power_supplies); i++)
-		power_supply_unregister(&test_power_supplies[i]);
+		power_supply_unregister(test_power_supplies[i]);
 
 	module_initialized = false;
 }
@@ -320,7 +335,7 @@
 static int param_set_ac_online(const char *key, const struct kernel_param *kp)
 {
 	ac_online = map_get_value(map_ac_online, key, ac_online);
-	signal_power_supply_changed(&test_power_supplies[TEST_AC]);
+	signal_power_supply_changed(test_power_supplies[TEST_AC]);
 	return 0;
 }
 
@@ -333,7 +348,7 @@
 static int param_set_usb_online(const char *key, const struct kernel_param *kp)
 {
 	usb_online = map_get_value(map_ac_online, key, usb_online);
-	signal_power_supply_changed(&test_power_supplies[TEST_USB]);
+	signal_power_supply_changed(test_power_supplies[TEST_USB]);
 	return 0;
 }
 
@@ -347,7 +362,7 @@
 					const struct kernel_param *kp)
 {
 	battery_status = map_get_value(map_status, key, battery_status);
-	signal_power_supply_changed(&test_power_supplies[TEST_BATTERY]);
+	signal_power_supply_changed(test_power_supplies[TEST_BATTERY]);
 	return 0;
 }
 
@@ -361,7 +376,7 @@
 					const struct kernel_param *kp)
 {
 	battery_health = map_get_value(map_health, key, battery_health);
-	signal_power_supply_changed(&test_power_supplies[TEST_BATTERY]);
+	signal_power_supply_changed(test_power_supplies[TEST_BATTERY]);
 	return 0;
 }
 
@@ -375,7 +390,7 @@
 					const struct kernel_param *kp)
 {
 	battery_present = map_get_value(map_present, key, battery_present);
-	signal_power_supply_changed(&test_power_supplies[TEST_AC]);
+	signal_power_supply_changed(test_power_supplies[TEST_AC]);
 	return 0;
 }
 
@@ -391,7 +406,7 @@
 {
 	battery_technology = map_get_value(map_technology, key,
 						battery_technology);
-	signal_power_supply_changed(&test_power_supplies[TEST_BATTERY]);
+	signal_power_supply_changed(test_power_supplies[TEST_BATTERY]);
 	return 0;
 }
 
@@ -412,7 +427,7 @@
 		return -EINVAL;
 
 	battery_capacity = tmp;
-	signal_power_supply_changed(&test_power_supplies[TEST_BATTERY]);
+	signal_power_supply_changed(test_power_supplies[TEST_BATTERY]);
 	return 0;
 }
 
@@ -427,7 +442,7 @@
 		return -EINVAL;
 
 	battery_voltage = tmp;
-	signal_power_supply_changed(&test_power_supplies[TEST_BATTERY]);
+	signal_power_supply_changed(test_power_supplies[TEST_BATTERY]);
 	return 0;
 }
 
diff --git a/drivers/power/tosa_battery.c b/drivers/power/tosa_battery.c
index f4d80df..6e88c1b 100644
--- a/drivers/power/tosa_battery.c
+++ b/drivers/power/tosa_battery.c
@@ -26,7 +26,7 @@
 
 struct tosa_bat {
 	int status;
-	struct power_supply psy;
+	struct power_supply *psy;
 	int full_chrg;
 
 	struct mutex work_lock; /* protects data */
@@ -61,7 +61,7 @@
 	mutex_lock(&bat_lock);
 	gpio_set_value(bat->gpio_bat, 1);
 	msleep(5);
-	value = wm97xx_read_aux_adc(dev_get_drvdata(bat->psy.dev->parent),
+	value = wm97xx_read_aux_adc(dev_get_drvdata(bat->psy->dev.parent),
 			bat->adc_bat);
 	gpio_set_value(bat->gpio_bat, 0);
 	mutex_unlock(&bat_lock);
@@ -81,7 +81,7 @@
 	mutex_lock(&bat_lock);
 	gpio_set_value(bat->gpio_temp, 1);
 	msleep(5);
-	value = wm97xx_read_aux_adc(dev_get_drvdata(bat->psy.dev->parent),
+	value = wm97xx_read_aux_adc(dev_get_drvdata(bat->psy->dev.parent),
 			bat->adc_temp);
 	gpio_set_value(bat->gpio_temp, 0);
 	mutex_unlock(&bat_lock);
@@ -96,7 +96,7 @@
 			    union power_supply_propval *val)
 {
 	int ret = 0;
-	struct tosa_bat *bat = container_of(psy, struct tosa_bat, psy);
+	struct tosa_bat *bat = power_supply_get_drvdata(psy);
 
 	if (bat->is_present && !bat->is_present(bat)
 			&& psp != POWER_SUPPLY_PROP_PRESENT) {
@@ -158,14 +158,14 @@
 static void tosa_bat_update(struct tosa_bat *bat)
 {
 	int old;
-	struct power_supply *psy = &bat->psy;
+	struct power_supply *psy = bat->psy;
 
 	mutex_lock(&bat->work_lock);
 
 	old = bat->status;
 
 	if (bat->is_present && !bat->is_present(bat)) {
-		printk(KERN_NOTICE "%s not present\n", psy->name);
+		printk(KERN_NOTICE "%s not present\n", psy->desc->name);
 		bat->status = POWER_SUPPLY_STATUS_UNKNOWN;
 		bat->full_chrg = -1;
 	} else if (power_supply_am_i_supplied(psy)) {
@@ -222,18 +222,38 @@
 	POWER_SUPPLY_PROP_PRESENT,
 };
 
+static const struct power_supply_desc tosa_bat_main_desc = {
+	.name		= "main-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= tosa_bat_main_props,
+	.num_properties	= ARRAY_SIZE(tosa_bat_main_props),
+	.get_property	= tosa_bat_get_property,
+	.external_power_changed = tosa_bat_external_power_changed,
+	.use_for_apm	= 1,
+};
+
+static const struct power_supply_desc tosa_bat_jacket_desc = {
+	.name		= "jacket-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= tosa_bat_main_props,
+	.num_properties	= ARRAY_SIZE(tosa_bat_main_props),
+	.get_property	= tosa_bat_get_property,
+	.external_power_changed = tosa_bat_external_power_changed,
+};
+
+static const struct power_supply_desc tosa_bat_bu_desc = {
+	.name		= "backup-battery",
+	.type		= POWER_SUPPLY_TYPE_BATTERY,
+	.properties	= tosa_bat_bu_props,
+	.num_properties	= ARRAY_SIZE(tosa_bat_bu_props),
+	.get_property	= tosa_bat_get_property,
+	.external_power_changed = tosa_bat_external_power_changed,
+};
+
 static struct tosa_bat tosa_bat_main = {
 	.status = POWER_SUPPLY_STATUS_DISCHARGING,
 	.full_chrg = -1,
-	.psy = {
-		.name		= "main-battery",
-		.type		= POWER_SUPPLY_TYPE_BATTERY,
-		.properties	= tosa_bat_main_props,
-		.num_properties	= ARRAY_SIZE(tosa_bat_main_props),
-		.get_property	= tosa_bat_get_property,
-		.external_power_changed = tosa_bat_external_power_changed,
-		.use_for_apm	= 1,
-	},
+	.psy = NULL,
 
 	.gpio_full = TOSA_GPIO_BAT0_CRG,
 	.gpio_charge_off = TOSA_GPIO_CHARGE_OFF,
@@ -254,14 +274,7 @@
 static struct tosa_bat tosa_bat_jacket = {
 	.status = POWER_SUPPLY_STATUS_DISCHARGING,
 	.full_chrg = -1,
-	.psy = {
-		.name		= "jacket-battery",
-		.type		= POWER_SUPPLY_TYPE_BATTERY,
-		.properties	= tosa_bat_main_props,
-		.num_properties	= ARRAY_SIZE(tosa_bat_main_props),
-		.get_property	= tosa_bat_get_property,
-		.external_power_changed = tosa_bat_external_power_changed,
-	},
+	.psy = NULL,
 
 	.is_present = tosa_jacket_bat_is_present,
 	.gpio_full = TOSA_GPIO_BAT1_CRG,
@@ -283,15 +296,7 @@
 static struct tosa_bat tosa_bat_bu = {
 	.status = POWER_SUPPLY_STATUS_UNKNOWN,
 	.full_chrg = -1,
-
-	.psy = {
-		.name		= "backup-battery",
-		.type		= POWER_SUPPLY_TYPE_BATTERY,
-		.properties	= tosa_bat_bu_props,
-		.num_properties	= ARRAY_SIZE(tosa_bat_bu_props),
-		.get_property	= tosa_bat_get_property,
-		.external_power_changed = tosa_bat_external_power_changed,
-	},
+	.psy = NULL,
 
 	.gpio_full = -1,
 	.gpio_charge_off = -1,
@@ -345,6 +350,9 @@
 static int tosa_bat_probe(struct platform_device *dev)
 {
 	int ret;
+	struct power_supply_config main_psy_cfg = {},
+				   jacket_psy_cfg = {},
+				   bu_psy_cfg = {};
 
 	if (!machine_is_tosa())
 		return -ENODEV;
@@ -358,15 +366,31 @@
 
 	INIT_WORK(&bat_work, tosa_bat_work);
 
-	ret = power_supply_register(&dev->dev, &tosa_bat_main.psy);
-	if (ret)
+	main_psy_cfg.drv_data = &tosa_bat_main;
+	tosa_bat_main.psy = power_supply_register(&dev->dev,
+						  &tosa_bat_main_desc,
+						  &main_psy_cfg);
+	if (IS_ERR(tosa_bat_main.psy)) {
+		ret = PTR_ERR(tosa_bat_main.psy);
 		goto err_psy_reg_main;
-	ret = power_supply_register(&dev->dev, &tosa_bat_jacket.psy);
-	if (ret)
+	}
+
+	jacket_psy_cfg.drv_data = &tosa_bat_jacket;
+	tosa_bat_jacket.psy = power_supply_register(&dev->dev,
+						    &tosa_bat_jacket_desc,
+						    &jacket_psy_cfg);
+	if (IS_ERR(tosa_bat_jacket.psy)) {
+		ret = PTR_ERR(tosa_bat_jacket.psy);
 		goto err_psy_reg_jacket;
-	ret = power_supply_register(&dev->dev, &tosa_bat_bu.psy);
-	if (ret)
+	}
+
+	bu_psy_cfg.drv_data = &tosa_bat_bu;
+	tosa_bat_bu.psy = power_supply_register(&dev->dev, &tosa_bat_bu_desc,
+						&bu_psy_cfg);
+	if (IS_ERR(tosa_bat_bu.psy)) {
+		ret = PTR_ERR(tosa_bat_bu.psy);
 		goto err_psy_reg_bu;
+	}
 
 	ret = request_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG),
 				tosa_bat_gpio_isr,
@@ -395,11 +419,11 @@
 err_req_jacket:
 	free_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG), &tosa_bat_main);
 err_req_main:
-	power_supply_unregister(&tosa_bat_bu.psy);
+	power_supply_unregister(tosa_bat_bu.psy);
 err_psy_reg_bu:
-	power_supply_unregister(&tosa_bat_jacket.psy);
+	power_supply_unregister(tosa_bat_jacket.psy);
 err_psy_reg_jacket:
-	power_supply_unregister(&tosa_bat_main.psy);
+	power_supply_unregister(tosa_bat_main.psy);
 err_psy_reg_main:
 
 	/* see comment in tosa_bat_remove */
@@ -415,9 +439,9 @@
 	free_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG), &tosa_bat_jacket);
 	free_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG), &tosa_bat_main);
 
-	power_supply_unregister(&tosa_bat_bu.psy);
-	power_supply_unregister(&tosa_bat_jacket.psy);
-	power_supply_unregister(&tosa_bat_main.psy);
+	power_supply_unregister(tosa_bat_bu.psy);
+	power_supply_unregister(tosa_bat_jacket.psy);
+	power_supply_unregister(tosa_bat_main.psy);
 
 	/*
 	 * Now cancel the bat_work.  We won't get any more schedules,
diff --git a/drivers/power/tps65090-charger.c b/drivers/power/tps65090-charger.c
index 0f4e597..7e8fbd2 100644
--- a/drivers/power/tps65090-charger.c
+++ b/drivers/power/tps65090-charger.c
@@ -43,7 +43,7 @@
 	int	irq;
 	struct task_struct	*poll_task;
 	bool			passive_mode;
-	struct power_supply	ac;
+	struct power_supply	*ac;
 	struct tps65090_platform_data *pdata;
 };
 
@@ -135,8 +135,7 @@
 			enum power_supply_property psp,
 			union power_supply_propval *val)
 {
-	struct tps65090_charger *charger = container_of(psy,
-					struct tps65090_charger, ac);
+	struct tps65090_charger *charger = power_supply_get_drvdata(psy);
 
 	if (psp == POWER_SUPPLY_PROP_ONLINE) {
 		val->intval = charger->ac_online;
@@ -190,7 +189,7 @@
 	}
 
 	if (charger->prev_ac_online != charger->ac_online)
-		power_supply_changed(&charger->ac);
+		power_supply_changed(charger->ac);
 
 	return IRQ_HANDLED;
 }
@@ -229,10 +228,19 @@
 	return 0;
 }
 
+static const struct power_supply_desc tps65090_charger_desc = {
+	.name			= "tps65090-ac",
+	.type			= POWER_SUPPLY_TYPE_MAINS,
+	.get_property		= tps65090_ac_get_property,
+	.properties		= tps65090_ac_props,
+	.num_properties		= ARRAY_SIZE(tps65090_ac_props),
+};
+
 static int tps65090_charger_probe(struct platform_device *pdev)
 {
 	struct tps65090_charger *cdata;
 	struct tps65090_platform_data *pdata;
+	struct power_supply_config psy_cfg = {};
 	uint8_t status1 = 0;
 	int ret;
 	int irq;
@@ -259,19 +267,16 @@
 	cdata->dev			= &pdev->dev;
 	cdata->pdata			= pdata;
 
-	cdata->ac.name			= "tps65090-ac";
-	cdata->ac.type			= POWER_SUPPLY_TYPE_MAINS;
-	cdata->ac.get_property		= tps65090_ac_get_property;
-	cdata->ac.properties		= tps65090_ac_props;
-	cdata->ac.num_properties	= ARRAY_SIZE(tps65090_ac_props);
-	cdata->ac.supplied_to		= pdata->supplied_to;
-	cdata->ac.num_supplicants	= pdata->num_supplicants;
-	cdata->ac.of_node		= pdev->dev.of_node;
+	psy_cfg.supplied_to		= pdata->supplied_to;
+	psy_cfg.num_supplicants		= pdata->num_supplicants;
+	psy_cfg.of_node			= pdev->dev.of_node;
+	psy_cfg.drv_data		= cdata;
 
-	ret = power_supply_register(&pdev->dev, &cdata->ac);
-	if (ret) {
+	cdata->ac = power_supply_register(&pdev->dev, &tps65090_charger_desc,
+			&psy_cfg);
+	if (IS_ERR(cdata->ac)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
-		return ret;
+		return PTR_ERR(cdata->ac);
 	}
 
 	irq = platform_get_irq(pdev, 0);
@@ -301,7 +306,7 @@
 			goto fail_unregister_supply;
 		}
 		cdata->ac_online = 1;
-		power_supply_changed(&cdata->ac);
+		power_supply_changed(cdata->ac);
 	}
 
 	if (irq != -ENXIO) {
@@ -328,7 +333,7 @@
 	return 0;
 
 fail_unregister_supply:
-	power_supply_unregister(&cdata->ac);
+	power_supply_unregister(cdata->ac);
 
 	return ret;
 }
@@ -339,12 +344,12 @@
 
 	if (cdata->irq == -ENXIO)
 		kthread_stop(cdata->poll_task);
-	power_supply_unregister(&cdata->ac);
+	power_supply_unregister(cdata->ac);
 
 	return 0;
 }
 
-static struct of_device_id of_tps65090_charger_match[] = {
+static const struct of_device_id of_tps65090_charger_match[] = {
 	{ .compatible = "ti,tps65090-charger", },
 	{ /* end */ }
 };
diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c
index d35b83e..02a522c 100644
--- a/drivers/power/twl4030_charger.c
+++ b/drivers/power/twl4030_charger.c
@@ -87,8 +87,8 @@
 
 struct twl4030_bci {
 	struct device		*dev;
-	struct power_supply	ac;
-	struct power_supply	usb;
+	struct power_supply	*ac;
+	struct power_supply	*usb;
 	struct usb_phy		*transceiver;
 	struct notifier_block	usb_nb;
 	struct work_struct	work;
@@ -318,8 +318,8 @@
 	struct twl4030_bci *bci = arg;
 
 	dev_dbg(bci->dev, "CHG_PRES irq\n");
-	power_supply_changed(&bci->ac);
-	power_supply_changed(&bci->usb);
+	power_supply_changed(bci->ac);
+	power_supply_changed(bci->usb);
 
 	return IRQ_HANDLED;
 }
@@ -347,8 +347,8 @@
 
 	if (irqs1 & (TWL4030_ICHGLOW | TWL4030_ICHGEOC)) {
 		/* charger state change, inform the core */
-		power_supply_changed(&bci->ac);
-		power_supply_changed(&bci->usb);
+		power_supply_changed(bci->ac);
+		power_supply_changed(bci->usb);
 	}
 
 	/* various monitoring events, for now we just log them here */
@@ -463,7 +463,7 @@
 				    enum power_supply_property psp,
 				    union power_supply_propval *val)
 {
-	struct twl4030_bci *bci = dev_get_drvdata(psy->dev->parent);
+	struct twl4030_bci *bci = dev_get_drvdata(psy->dev.parent);
 	int is_charging;
 	int state;
 	int ret;
@@ -472,7 +472,7 @@
 	if (state < 0)
 		return state;
 
-	if (psy->type == POWER_SUPPLY_TYPE_USB)
+	if (psy->desc->type == POWER_SUPPLY_TYPE_USB)
 		is_charging = state & TWL4030_MSTATEC_USB;
 	else
 		is_charging = state & TWL4030_MSTATEC_AC;
@@ -488,7 +488,7 @@
 		/* charging must be active for meaningful result */
 		if (!is_charging)
 			return -ENODATA;
-		if (psy->type == POWER_SUPPLY_TYPE_USB) {
+		if (psy->desc->type == POWER_SUPPLY_TYPE_USB) {
 			ret = twl4030bci_read_adc_val(TWL4030_BCIVBUS);
 			if (ret < 0)
 				return ret;
@@ -558,6 +558,22 @@
 }
 #endif
 
+static const struct power_supply_desc twl4030_bci_ac_desc = {
+	.name		= "twl4030_ac",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= twl4030_charger_props,
+	.num_properties	= ARRAY_SIZE(twl4030_charger_props),
+	.get_property	= twl4030_bci_get_property,
+};
+
+static const struct power_supply_desc twl4030_bci_usb_desc = {
+	.name		= "twl4030_usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= twl4030_charger_props,
+	.num_properties	= ARRAY_SIZE(twl4030_charger_props),
+	.get_property	= twl4030_bci_get_property,
+};
+
 static int __init twl4030_bci_probe(struct platform_device *pdev)
 {
 	struct twl4030_bci *bci;
@@ -584,28 +600,21 @@
 	}
 
 	platform_set_drvdata(pdev, bci);
-	bci->ac.name = "twl4030_ac";
-	bci->ac.type = POWER_SUPPLY_TYPE_MAINS;
-	bci->ac.properties = twl4030_charger_props;
-	bci->ac.num_properties = ARRAY_SIZE(twl4030_charger_props);
-	bci->ac.get_property = twl4030_bci_get_property;
 
-	ret = power_supply_register(&pdev->dev, &bci->ac);
-	if (ret) {
+	bci->ac = power_supply_register(&pdev->dev, &twl4030_bci_ac_desc,
+					NULL);
+	if (IS_ERR(bci->ac)) {
+		ret = PTR_ERR(bci->ac);
 		dev_err(&pdev->dev, "failed to register ac: %d\n", ret);
 		goto fail_register_ac;
 	}
 
-	bci->usb.name = "twl4030_usb";
-	bci->usb.type = POWER_SUPPLY_TYPE_USB;
-	bci->usb.properties = twl4030_charger_props;
-	bci->usb.num_properties = ARRAY_SIZE(twl4030_charger_props);
-	bci->usb.get_property = twl4030_bci_get_property;
-
 	bci->usb_reg = regulator_get(bci->dev, "bci3v1");
 
-	ret = power_supply_register(&pdev->dev, &bci->usb);
-	if (ret) {
+	bci->usb = power_supply_register(&pdev->dev, &twl4030_bci_usb_desc,
+					 NULL);
+	if (IS_ERR(bci->usb)) {
+		ret = PTR_ERR(bci->usb);
 		dev_err(&pdev->dev, "failed to register usb: %d\n", ret);
 		goto fail_register_usb;
 	}
@@ -670,9 +679,9 @@
 fail_bci_irq:
 	free_irq(bci->irq_chg, bci);
 fail_chg_irq:
-	power_supply_unregister(&bci->usb);
+	power_supply_unregister(bci->usb);
 fail_register_usb:
-	power_supply_unregister(&bci->ac);
+	power_supply_unregister(bci->ac);
 fail_register_ac:
 fail_no_battery:
 	kfree(bci);
@@ -700,8 +709,8 @@
 	}
 	free_irq(bci->irq_bci, bci);
 	free_irq(bci->irq_chg, bci);
-	power_supply_unregister(&bci->usb);
-	power_supply_unregister(&bci->ac);
+	power_supply_unregister(bci->usb);
+	power_supply_unregister(bci->ac);
 	kfree(bci);
 
 	return 0;
diff --git a/drivers/power/twl4030_madc_battery.c b/drivers/power/twl4030_madc_battery.c
index 7ef445a..f5817e4 100644
--- a/drivers/power/twl4030_madc_battery.c
+++ b/drivers/power/twl4030_madc_battery.c
@@ -19,10 +19,14 @@
 #include <linux/sort.h>
 #include <linux/i2c/twl4030-madc.h>
 #include <linux/power/twl4030_madc_battery.h>
+#include <linux/iio/consumer.h>
 
 struct twl4030_madc_battery {
-	struct power_supply psy;
+	struct power_supply *psy;
 	struct twl4030_madc_bat_platform_data *pdata;
+	struct iio_channel *channel_temp;
+	struct iio_channel *channel_ichg;
+	struct iio_channel *channel_vbat;
 };
 
 static enum power_supply_property twl4030_madc_bat_props[] = {
@@ -38,43 +42,34 @@
 	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
 };
 
-static int madc_read(int index)
+static int madc_read(struct iio_channel *channel)
 {
-	struct twl4030_madc_request req;
-	int val;
+	int val, err;
+	err = iio_read_channel_processed(channel, &val);
+	if (err < 0)
+		return err;
 
-	req.channels = index;
-	req.method = TWL4030_MADC_SW2;
-	req.type = TWL4030_MADC_WAIT;
-	req.do_avg = 0;
-	req.raw = false;
-	req.func_cb = NULL;
-
-	val = twl4030_madc_conversion(&req);
-	if (val < 0)
-		return val;
-
-	return req.rbuf[ffs(index) - 1];
+	return val;
 }
 
-static int twl4030_madc_bat_get_charging_status(void)
+static int twl4030_madc_bat_get_charging_status(struct twl4030_madc_battery *bt)
 {
-	return (madc_read(TWL4030_MADC_ICHG) > 0) ? 1 : 0;
+	return (madc_read(bt->channel_ichg) > 0) ? 1 : 0;
 }
 
-static int twl4030_madc_bat_get_voltage(void)
+static int twl4030_madc_bat_get_voltage(struct twl4030_madc_battery *bt)
 {
-	return madc_read(TWL4030_MADC_VBAT);
+	return madc_read(bt->channel_vbat);
 }
 
-static int twl4030_madc_bat_get_current(void)
+static int twl4030_madc_bat_get_current(struct twl4030_madc_battery *bt)
 {
-	return madc_read(TWL4030_MADC_ICHG) * 1000;
+	return madc_read(bt->channel_ichg) * 1000;
 }
 
-static int twl4030_madc_bat_get_temp(void)
+static int twl4030_madc_bat_get_temp(struct twl4030_madc_battery *bt)
 {
-	return madc_read(TWL4030_MADC_BTEMP) * 10;
+	return madc_read(bt->channel_temp) * 10;
 }
 
 static int twl4030_madc_bat_voltscale(struct twl4030_madc_battery *bat,
@@ -84,7 +79,7 @@
 	int i, res = 0;
 
 	/* choose charging curve */
-	if (twl4030_madc_bat_get_charging_status())
+	if (twl4030_madc_bat_get_charging_status(bat))
 		calibration = bat->pdata->charging;
 	else
 		calibration = bat->pdata->discharging;
@@ -113,29 +108,28 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
-	struct twl4030_madc_battery *bat = container_of(psy,
-					struct twl4030_madc_battery, psy);
+	struct twl4030_madc_battery *bat = power_supply_get_drvdata(psy);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		if (twl4030_madc_bat_voltscale(bat,
-				twl4030_madc_bat_get_voltage()) > 95)
+				twl4030_madc_bat_get_voltage(bat)) > 95)
 			val->intval = POWER_SUPPLY_STATUS_FULL;
 		else {
-			if (twl4030_madc_bat_get_charging_status())
+			if (twl4030_madc_bat_get_charging_status(bat))
 				val->intval = POWER_SUPPLY_STATUS_CHARGING;
 			else
 				val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 		}
 		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-		val->intval = twl4030_madc_bat_get_voltage() * 1000;
+		val->intval = twl4030_madc_bat_get_voltage(bat) * 1000;
 		break;
 	case POWER_SUPPLY_PROP_TECHNOLOGY:
 		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
-		val->intval = twl4030_madc_bat_get_current();
+		val->intval = twl4030_madc_bat_get_current(bat);
 		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		/* assume battery is always present */
@@ -143,23 +137,23 @@
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_NOW: {
 			int percent = twl4030_madc_bat_voltscale(bat,
-					twl4030_madc_bat_get_voltage());
+					twl4030_madc_bat_get_voltage(bat));
 			val->intval = (percent * bat->pdata->capacity) / 100;
 			break;
 		}
 	case POWER_SUPPLY_PROP_CAPACITY:
 		val->intval = twl4030_madc_bat_voltscale(bat,
-					twl4030_madc_bat_get_voltage());
+					twl4030_madc_bat_get_voltage(bat));
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
 		val->intval = bat->pdata->capacity;
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
-		val->intval = twl4030_madc_bat_get_temp();
+		val->intval = twl4030_madc_bat_get_temp(bat);
 		break;
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW: {
 			int percent = twl4030_madc_bat_voltscale(bat,
-					twl4030_madc_bat_get_voltage());
+					twl4030_madc_bat_get_voltage(bat));
 			/* in mAh */
 			int chg = (percent * (bat->pdata->capacity/1000))/100;
 
@@ -176,12 +170,19 @@
 
 static void twl4030_madc_bat_ext_changed(struct power_supply *psy)
 {
-	struct twl4030_madc_battery *bat = container_of(psy,
-					struct twl4030_madc_battery, psy);
-
-	power_supply_changed(&bat->psy);
+	power_supply_changed(psy);
 }
 
+static const struct power_supply_desc twl4030_madc_bat_desc = {
+	.name			= "twl4030_battery",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= twl4030_madc_bat_props,
+	.num_properties		= ARRAY_SIZE(twl4030_madc_bat_props),
+	.get_property		= twl4030_madc_bat_get_property,
+	.external_power_changed	= twl4030_madc_bat_ext_changed,
+
+};
+
 static int twl4030_cmp(const void *a, const void *b)
 {
 	return ((struct twl4030_madc_bat_calibration *)b)->voltage -
@@ -192,19 +193,31 @@
 {
 	struct twl4030_madc_battery *twl4030_madc_bat;
 	struct twl4030_madc_bat_platform_data *pdata = pdev->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
+	int ret = 0;
 
-	twl4030_madc_bat = kzalloc(sizeof(*twl4030_madc_bat), GFP_KERNEL);
+	twl4030_madc_bat = devm_kzalloc(&pdev->dev, sizeof(*twl4030_madc_bat),
+				GFP_KERNEL);
 	if (!twl4030_madc_bat)
 		return -ENOMEM;
 
-	twl4030_madc_bat->psy.name = "twl4030_battery";
-	twl4030_madc_bat->psy.type = POWER_SUPPLY_TYPE_BATTERY;
-	twl4030_madc_bat->psy.properties = twl4030_madc_bat_props;
-	twl4030_madc_bat->psy.num_properties =
-					ARRAY_SIZE(twl4030_madc_bat_props);
-	twl4030_madc_bat->psy.get_property = twl4030_madc_bat_get_property;
-	twl4030_madc_bat->psy.external_power_changed =
-					twl4030_madc_bat_ext_changed;
+	twl4030_madc_bat->channel_temp = iio_channel_get(&pdev->dev, "temp");
+	if (IS_ERR(twl4030_madc_bat->channel_temp)) {
+		ret = PTR_ERR(twl4030_madc_bat->channel_temp);
+		goto err;
+	}
+
+	twl4030_madc_bat->channel_ichg = iio_channel_get(&pdev->dev, "ichg");
+	if (IS_ERR(twl4030_madc_bat->channel_ichg)) {
+		ret = PTR_ERR(twl4030_madc_bat->channel_ichg);
+		goto err_temp;
+	}
+
+	twl4030_madc_bat->channel_vbat = iio_channel_get(&pdev->dev, "vbat");
+	if (IS_ERR(twl4030_madc_bat->channel_vbat)) {
+		ret = PTR_ERR(twl4030_madc_bat->channel_vbat);
+		goto err_ichg;
+	}
 
 	/* sort charging and discharging calibration data */
 	sort(pdata->charging, pdata->charging_size,
@@ -216,17 +229,36 @@
 
 	twl4030_madc_bat->pdata = pdata;
 	platform_set_drvdata(pdev, twl4030_madc_bat);
-	power_supply_register(&pdev->dev, &twl4030_madc_bat->psy);
+	psy_cfg.drv_data = twl4030_madc_bat;
+	twl4030_madc_bat->psy = power_supply_register(&pdev->dev,
+						      &twl4030_madc_bat_desc,
+						      &psy_cfg);
+	if (IS_ERR(twl4030_madc_bat->psy)) {
+		ret = PTR_ERR(twl4030_madc_bat->psy);
+		goto err_vbat;
+	}
 
 	return 0;
+
+err_vbat:
+	iio_channel_release(twl4030_madc_bat->channel_vbat);
+err_ichg:
+	iio_channel_release(twl4030_madc_bat->channel_ichg);
+err_temp:
+	iio_channel_release(twl4030_madc_bat->channel_temp);
+err:
+	return ret;
 }
 
 static int twl4030_madc_battery_remove(struct platform_device *pdev)
 {
 	struct twl4030_madc_battery *bat = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&bat->psy);
-	kfree(bat);
+	power_supply_unregister(bat->psy);
+
+	iio_channel_release(bat->channel_vbat);
+	iio_channel_release(bat->channel_ichg);
+	iio_channel_release(bat->channel_temp);
 
 	return 0;
 }
@@ -243,3 +275,4 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Lukas Märdian <lukas@goldelico.com>");
 MODULE_DESCRIPTION("twl4030_madc battery driver");
+MODULE_ALIAS("platform:twl4030_madc_battery");
diff --git a/drivers/power/wm831x_backup.c b/drivers/power/wm831x_backup.c
index 56fb509..2e33109 100644
--- a/drivers/power/wm831x_backup.c
+++ b/drivers/power/wm831x_backup.c
@@ -21,7 +21,8 @@
 
 struct wm831x_backup {
 	struct wm831x *wm831x;
-	struct power_supply backup;
+	struct power_supply *backup;
+	struct power_supply_desc backup_desc;
 	char name[20];
 };
 
@@ -115,7 +116,7 @@
 				  enum power_supply_property psp,
 				  union power_supply_propval *val)
 {
-	struct wm831x_backup *devdata = dev_get_drvdata(psy->dev->parent);
+	struct wm831x_backup *devdata = dev_get_drvdata(psy->dev.parent);
 	struct wm831x *wm831x = devdata->wm831x;
 	int ret = 0;
 
@@ -166,8 +167,6 @@
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
 	struct wm831x_backup *devdata;
-	struct power_supply *backup;
-	int ret;
 
 	devdata = devm_kzalloc(&pdev->dev, sizeof(struct wm831x_backup),
 				GFP_KERNEL);
@@ -177,8 +176,6 @@
 	devdata->wm831x = wm831x;
 	platform_set_drvdata(pdev, devdata);
 
-	backup = &devdata->backup;
-
 	/* We ignore configuration failures since we can still read
 	 * back the status without enabling the charger (which may
 	 * already be enabled anyway).
@@ -192,21 +189,22 @@
 		snprintf(devdata->name, sizeof(devdata->name),
 			 "wm831x-backup");
 
-	backup->name = devdata->name;
-	backup->type = POWER_SUPPLY_TYPE_BATTERY;
-	backup->properties = wm831x_backup_props;
-	backup->num_properties = ARRAY_SIZE(wm831x_backup_props);
-	backup->get_property = wm831x_backup_get_prop;
-	ret = power_supply_register(&pdev->dev, backup);
+	devdata->backup_desc.name = devdata->name;
+	devdata->backup_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+	devdata->backup_desc.properties = wm831x_backup_props;
+	devdata->backup_desc.num_properties = ARRAY_SIZE(wm831x_backup_props);
+	devdata->backup_desc.get_property = wm831x_backup_get_prop;
+	devdata->backup = power_supply_register(&pdev->dev,
+						&devdata->backup_desc, NULL);
 
-	return ret;
+	return PTR_ERR_OR_ZERO(devdata->backup);
 }
 
 static int wm831x_backup_remove(struct platform_device *pdev)
 {
 	struct wm831x_backup *devdata = platform_get_drvdata(pdev);
 
-	power_supply_unregister(&devdata->backup);
+	power_supply_unregister(devdata->backup);
 
 	return 0;
 }
diff --git a/drivers/power/wm831x_power.c b/drivers/power/wm831x_power.c
index 3bed2f5..0161bda 100644
--- a/drivers/power/wm831x_power.c
+++ b/drivers/power/wm831x_power.c
@@ -21,9 +21,12 @@
 
 struct wm831x_power {
 	struct wm831x *wm831x;
-	struct power_supply wall;
-	struct power_supply usb;
-	struct power_supply battery;
+	struct power_supply *wall;
+	struct power_supply *usb;
+	struct power_supply *battery;
+	struct power_supply_desc wall_desc;
+	struct power_supply_desc usb_desc;
+	struct power_supply_desc battery_desc;
 	char wall_name[20];
 	char usb_name[20];
 	char battery_name[20];
@@ -67,7 +70,7 @@
 				enum power_supply_property psp,
 				union power_supply_propval *val)
 {
-	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev.parent);
 	struct wm831x *wm831x = wm831x_power->wm831x;
 	int ret = 0;
 
@@ -98,7 +101,7 @@
 			       enum power_supply_property psp,
 			       union power_supply_propval *val)
 {
-	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev.parent);
 	struct wm831x *wm831x = wm831x_power->wm831x;
 	int ret = 0;
 
@@ -393,7 +396,7 @@
 			       enum power_supply_property psp,
 			       union power_supply_propval *val)
 {
-	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev.parent);
 	struct wm831x *wm831x = wm831x_power->wm831x;
 	int ret = 0;
 
@@ -451,7 +454,7 @@
 	/* The battery charger is autonomous so we don't need to do
 	 * anything except kick user space */
 	if (wm831x_power->have_battery)
-		power_supply_changed(&wm831x_power->battery);
+		power_supply_changed(wm831x_power->battery);
 
 	return IRQ_HANDLED;
 }
@@ -482,9 +485,9 @@
 
 	/* Just notify for everything - little harm in overnotifying. */
 	if (wm831x_power->have_battery)
-		power_supply_changed(&wm831x_power->battery);
-	power_supply_changed(&wm831x_power->usb);
-	power_supply_changed(&wm831x_power->wall);
+		power_supply_changed(wm831x_power->battery);
+	power_supply_changed(wm831x_power->usb);
+	power_supply_changed(wm831x_power->wall);
 
 	return IRQ_HANDLED;
 }
@@ -494,9 +497,6 @@
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
 	struct wm831x_power *power;
-	struct power_supply *usb;
-	struct power_supply *battery;
-	struct power_supply *wall;
 	int ret, irq, i;
 
 	power = kzalloc(sizeof(struct wm831x_power), GFP_KERNEL);
@@ -506,10 +506,6 @@
 	power->wm831x = wm831x;
 	platform_set_drvdata(pdev, power);
 
-	usb = &power->usb;
-	battery = &power->battery;
-	wall = &power->wall;
-
 	if (wm831x_pdata && wm831x_pdata->wm831x_num) {
 		snprintf(power->wall_name, sizeof(power->wall_name),
 			 "wm831x-wall.%d", wm831x_pdata->wm831x_num);
@@ -531,23 +527,28 @@
 	 */
 	wm831x_config_battery(wm831x);
 
-	wall->name = power->wall_name;
-	wall->type = POWER_SUPPLY_TYPE_MAINS;
-	wall->properties = wm831x_wall_props;
-	wall->num_properties = ARRAY_SIZE(wm831x_wall_props);
-	wall->get_property = wm831x_wall_get_prop;
-	ret = power_supply_register(&pdev->dev, wall);
-	if (ret)
+	power->wall_desc.name = power->wall_name;
+	power->wall_desc.type = POWER_SUPPLY_TYPE_MAINS;
+	power->wall_desc.properties = wm831x_wall_props;
+	power->wall_desc.num_properties = ARRAY_SIZE(wm831x_wall_props);
+	power->wall_desc.get_property = wm831x_wall_get_prop;
+	power->wall = power_supply_register(&pdev->dev, &power->wall_desc,
+					    NULL);
+	if (IS_ERR(power->wall)) {
+		ret = PTR_ERR(power->wall);
 		goto err_kmalloc;
+	}
 
-	usb->name = power->usb_name,
-	usb->type = POWER_SUPPLY_TYPE_USB;
-	usb->properties = wm831x_usb_props;
-	usb->num_properties = ARRAY_SIZE(wm831x_usb_props);
-	usb->get_property = wm831x_usb_get_prop;
-	ret = power_supply_register(&pdev->dev, usb);
-	if (ret)
+	power->usb_desc.name = power->usb_name,
+	power->usb_desc.type = POWER_SUPPLY_TYPE_USB;
+	power->usb_desc.properties = wm831x_usb_props;
+	power->usb_desc.num_properties = ARRAY_SIZE(wm831x_usb_props);
+	power->usb_desc.get_property = wm831x_usb_get_prop;
+	power->usb = power_supply_register(&pdev->dev, &power->usb_desc, NULL);
+	if (IS_ERR(power->usb)) {
+		ret = PTR_ERR(power->usb);
 		goto err_wall;
+	}
 
 	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_CONTROL_1);
 	if (ret < 0)
@@ -555,14 +556,18 @@
 	power->have_battery = ret & WM831X_CHG_ENA;
 
 	if (power->have_battery) {
-		    battery->name = power->battery_name;
-		    battery->properties = wm831x_bat_props;
-		    battery->num_properties = ARRAY_SIZE(wm831x_bat_props);
-		    battery->get_property = wm831x_bat_get_prop;
-		    battery->use_for_apm = 1;
-		    ret = power_supply_register(&pdev->dev, battery);
-		    if (ret)
-			    goto err_usb;
+		power->battery_desc.name = power->battery_name;
+		power->battery_desc.properties = wm831x_bat_props;
+		power->battery_desc.num_properties = ARRAY_SIZE(wm831x_bat_props);
+		power->battery_desc.get_property = wm831x_bat_get_prop;
+		power->battery_desc.use_for_apm = 1;
+		power->battery = power_supply_register(&pdev->dev,
+						       &power->battery_desc,
+						       NULL);
+		if (IS_ERR(power->battery)) {
+			ret = PTR_ERR(power->battery);
+			goto err_usb;
+		}
 	}
 
 	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
@@ -615,11 +620,11 @@
 	free_irq(irq, power);
 err_battery:
 	if (power->have_battery)
-		power_supply_unregister(battery);
+		power_supply_unregister(power->battery);
 err_usb:
-	power_supply_unregister(usb);
+	power_supply_unregister(power->usb);
 err_wall:
-	power_supply_unregister(wall);
+	power_supply_unregister(power->wall);
 err_kmalloc:
 	kfree(power);
 	return ret;
@@ -645,9 +650,9 @@
 	free_irq(irq, wm831x_power);
 
 	if (wm831x_power->have_battery)
-		power_supply_unregister(&wm831x_power->battery);
-	power_supply_unregister(&wm831x_power->wall);
-	power_supply_unregister(&wm831x_power->usb);
+		power_supply_unregister(wm831x_power->battery);
+	power_supply_unregister(wm831x_power->wall);
+	power_supply_unregister(wm831x_power->usb);
 	kfree(wm831x_power);
 	return 0;
 }
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index b3607e2..5c58806 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -196,14 +196,14 @@
 		break;
 	case WM8350_IRQ_CHG_TO:
 		dev_err(wm8350->dev, "charger timeout\n");
-		power_supply_changed(&power->battery);
+		power_supply_changed(power->battery);
 		break;
 
 	case WM8350_IRQ_CHG_BAT_HOT:
 	case WM8350_IRQ_CHG_BAT_COLD:
 	case WM8350_IRQ_CHG_START:
 	case WM8350_IRQ_CHG_END:
-		power_supply_changed(&power->battery);
+		power_supply_changed(power->battery);
 		break;
 
 	case WM8350_IRQ_CHG_FAST_RDY:
@@ -231,9 +231,9 @@
 	case WM8350_IRQ_EXT_WALL_FB:
 		wm8350_charger_config(wm8350, policy);
 	case WM8350_IRQ_EXT_BAT_FB:   /* Fall through */
-		power_supply_changed(&power->battery);
-		power_supply_changed(&power->usb);
-		power_supply_changed(&power->ac);
+		power_supply_changed(power->battery);
+		power_supply_changed(power->usb);
+		power_supply_changed(power->ac);
 		break;
 
 	default:
@@ -250,7 +250,7 @@
 			      enum power_supply_property psp,
 			      union power_supply_propval *val)
 {
-	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev.parent);
 	int ret = 0;
 
 	switch (psp) {
@@ -280,7 +280,7 @@
 			       enum power_supply_property psp,
 			       union power_supply_propval *val)
 {
-	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev.parent);
 	int ret = 0;
 
 	switch (psp) {
@@ -346,7 +346,7 @@
 				   enum power_supply_property psp,
 				   union power_supply_propval *val)
 {
-	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev.parent);
 	int ret = 0;
 
 	switch (psp) {
@@ -382,6 +382,30 @@
 	POWER_SUPPLY_PROP_CHARGE_TYPE,
 };
 
+static const struct power_supply_desc wm8350_ac_desc = {
+	.name		= "wm8350-ac",
+	.type		= POWER_SUPPLY_TYPE_MAINS,
+	.properties	= wm8350_ac_props,
+	.num_properties	= ARRAY_SIZE(wm8350_ac_props),
+	.get_property	= wm8350_ac_get_prop,
+};
+
+static const struct power_supply_desc wm8350_battery_desc = {
+	.name		= "wm8350-battery",
+	.properties	= wm8350_bat_props,
+	.num_properties	= ARRAY_SIZE(wm8350_bat_props),
+	.get_property	= wm8350_bat_get_property,
+	.use_for_apm	= 1,
+};
+
+static const struct power_supply_desc wm8350_usb_desc = {
+	.name		= "wm8350-usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= wm8350_usb_props,
+	.num_properties	= ARRAY_SIZE(wm8350_usb_props),
+	.get_property	= wm8350_usb_get_prop,
+};
+
 /*********************************************************************
  *		Initialisation
  *********************************************************************/
@@ -447,37 +471,24 @@
 	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
 	struct wm8350_power *power = &wm8350->power;
 	struct wm8350_charger_policy *policy = power->policy;
-	struct power_supply *usb = &power->usb;
-	struct power_supply *battery = &power->battery;
-	struct power_supply *ac = &power->ac;
 	int ret;
 
-	ac->name = "wm8350-ac";
-	ac->type = POWER_SUPPLY_TYPE_MAINS;
-	ac->properties = wm8350_ac_props;
-	ac->num_properties = ARRAY_SIZE(wm8350_ac_props);
-	ac->get_property = wm8350_ac_get_prop;
-	ret = power_supply_register(&pdev->dev, ac);
-	if (ret)
-		return ret;
+	power->ac = power_supply_register(&pdev->dev, &wm8350_ac_desc, NULL);
+	if (IS_ERR(power->ac))
+		return PTR_ERR(power->ac);
 
-	battery->name = "wm8350-battery";
-	battery->properties = wm8350_bat_props;
-	battery->num_properties = ARRAY_SIZE(wm8350_bat_props);
-	battery->get_property = wm8350_bat_get_property;
-	battery->use_for_apm = 1;
-	ret = power_supply_register(&pdev->dev, battery);
-	if (ret)
+	power->battery = power_supply_register(&pdev->dev, &wm8350_battery_desc,
+					       NULL);
+	if (IS_ERR(power->battery)) {
+		ret = PTR_ERR(power->battery);
 		goto battery_failed;
+	}
 
-	usb->name = "wm8350-usb",
-	usb->type = POWER_SUPPLY_TYPE_USB;
-	usb->properties = wm8350_usb_props;
-	usb->num_properties = ARRAY_SIZE(wm8350_usb_props);
-	usb->get_property = wm8350_usb_get_prop;
-	ret = power_supply_register(&pdev->dev, usb);
-	if (ret)
+	power->usb = power_supply_register(&pdev->dev, &wm8350_usb_desc, NULL);
+	if (IS_ERR(power->usb)) {
+		ret = PTR_ERR(power->usb);
 		goto usb_failed;
+	}
 
 	ret = device_create_file(&pdev->dev, &dev_attr_charger_state);
 	if (ret < 0)
@@ -494,9 +505,9 @@
 	return ret;
 
 usb_failed:
-	power_supply_unregister(battery);
+	power_supply_unregister(power->battery);
 battery_failed:
-	power_supply_unregister(ac);
+	power_supply_unregister(power->ac);
 
 	return ret;
 }
@@ -508,9 +519,9 @@
 
 	free_charger_irq(wm8350);
 	device_remove_file(&pdev->dev, &dev_attr_charger_state);
-	power_supply_unregister(&power->battery);
-	power_supply_unregister(&power->ac);
-	power_supply_unregister(&power->usb);
+	power_supply_unregister(power->battery);
+	power_supply_unregister(power->ac);
+	power_supply_unregister(power->usb);
 	return 0;
 }
 
diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index a8e6203..c2f09ed 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -32,20 +32,20 @@
 
 static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 {
-	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_pdata *wmdata = bat_ps->dev.parent->platform_data;
 	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
-	return wm97xx_read_aux_adc(dev_get_drvdata(bat_ps->dev->parent),
+	return wm97xx_read_aux_adc(dev_get_drvdata(bat_ps->dev.parent),
 					pdata->batt_aux) * pdata->batt_mult /
 					pdata->batt_div;
 }
 
 static unsigned long wm97xx_read_temp(struct power_supply *bat_ps)
 {
-	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_pdata *wmdata = bat_ps->dev.parent->platform_data;
 	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
-	return wm97xx_read_aux_adc(dev_get_drvdata(bat_ps->dev->parent),
+	return wm97xx_read_aux_adc(dev_get_drvdata(bat_ps->dev.parent),
 					pdata->temp_aux) * pdata->temp_mult /
 					pdata->temp_div;
 }
@@ -54,7 +54,7 @@
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_pdata *wmdata = bat_ps->dev.parent->platform_data;
 	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
 	switch (psp) {
@@ -105,7 +105,7 @@
 static void wm97xx_bat_update(struct power_supply *bat_ps)
 {
 	int old_status = bat_status;
-	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_pdata *wmdata = bat_ps->dev.parent->platform_data;
 	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
 	mutex_lock(&work_lock);
@@ -117,7 +117,7 @@
 			POWER_SUPPLY_STATUS_UNKNOWN;
 
 	if (old_status != bat_status) {
-		pr_debug("%s: %i -> %i\n", bat_ps->name, old_status,
+		pr_debug("%s: %i -> %i\n", bat_ps->desc->name, old_status,
 					bat_status);
 		power_supply_changed(bat_ps);
 	}
@@ -125,7 +125,8 @@
 	mutex_unlock(&work_lock);
 }
 
-static struct power_supply bat_ps = {
+static struct power_supply *bat_psy;
+static struct power_supply_desc bat_psy_desc = {
 	.type			= POWER_SUPPLY_TYPE_BATTERY,
 	.get_property		= wm97xx_bat_get_property,
 	.external_power_changed = wm97xx_bat_external_power_changed,
@@ -134,7 +135,7 @@
 
 static void wm97xx_bat_work(struct work_struct *work)
 {
-	wm97xx_bat_update(&bat_ps);
+	wm97xx_bat_update(bat_psy);
 }
 
 static irqreturn_t wm97xx_chrg_irq(int irq, void *data)
@@ -237,18 +238,20 @@
 		dev_info(&dev->dev, "Please consider setting proper battery "
 				"name in platform definition file, falling "
 				"back to name \"wm97xx-batt\"\n");
-		bat_ps.name = "wm97xx-batt";
+		bat_psy_desc.name = "wm97xx-batt";
 	} else
-		bat_ps.name = pdata->batt_name;
+		bat_psy_desc.name = pdata->batt_name;
 
-	bat_ps.properties = prop;
-	bat_ps.num_properties = props;
+	bat_psy_desc.properties = prop;
+	bat_psy_desc.num_properties = props;
 
-	ret = power_supply_register(&dev->dev, &bat_ps);
-	if (!ret)
+	bat_psy = power_supply_register(&dev->dev, &bat_psy_desc, NULL);
+	if (!IS_ERR(bat_psy)) {
 		schedule_work(&bat_work);
-	else
+	} else {
+		ret = PTR_ERR(bat_psy);
 		goto err4;
+	}
 
 	return 0;
 err4:
@@ -273,7 +276,7 @@
 		gpio_free(pdata->charge_gpio);
 	}
 	cancel_work_sync(&bat_work);
-	power_supply_unregister(&bat_ps);
+	power_supply_unregister(bat_psy);
 	kfree(prop);
 	return 0;
 }
diff --git a/drivers/power/z2_battery.c b/drivers/power/z2_battery.c
index 814d2e3..b201e3f 100644
--- a/drivers/power/z2_battery.c
+++ b/drivers/power/z2_battery.c
@@ -21,12 +21,13 @@
 #define	Z2_DEFAULT_NAME	"Z2"
 
 struct z2_charger {
-	struct z2_battery_info	*info;
-	int			bat_status;
-	struct i2c_client	*client;
-	struct power_supply	batt_ps;
-	struct mutex		work_lock;
-	struct work_struct	bat_work;
+	struct z2_battery_info		*info;
+	int				bat_status;
+	struct i2c_client		*client;
+	struct power_supply		*batt_ps;
+	struct power_supply_desc	batt_ps_desc;
+	struct mutex			work_lock;
+	struct work_struct		bat_work;
 };
 
 static unsigned long z2_read_bat(struct z2_charger *charger)
@@ -44,8 +45,7 @@
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
-	struct z2_charger *charger = container_of(batt_ps, struct z2_charger,
-						batt_ps);
+	struct z2_charger *charger = power_supply_get_drvdata(batt_ps);
 	struct z2_battery_info *info = charger->info;
 
 	switch (psp) {
@@ -85,8 +85,8 @@
 
 static void z2_batt_ext_power_changed(struct power_supply *batt_ps)
 {
-	struct z2_charger *charger = container_of(batt_ps, struct z2_charger,
-						batt_ps);
+	struct z2_charger *charger = power_supply_get_drvdata(batt_ps);
+
 	schedule_work(&charger->bat_work);
 }
 
@@ -106,9 +106,10 @@
 		POWER_SUPPLY_STATUS_UNKNOWN;
 
 	if (old_status != charger->bat_status) {
-		pr_debug("%s: %i -> %i\n", charger->batt_ps.name, old_status,
-			charger->bat_status);
-		power_supply_changed(&charger->batt_ps);
+		pr_debug("%s: %i -> %i\n", charger->batt_ps->desc->name,
+				old_status,
+				charger->bat_status);
+		power_supply_changed(charger->batt_ps);
 	}
 
 	mutex_unlock(&charger->work_lock);
@@ -166,16 +167,17 @@
 				"Please consider setting proper battery "
 				"name in platform definition file, falling "
 				"back to name \" Z2_DEFAULT_NAME \"\n");
-		charger->batt_ps.name = Z2_DEFAULT_NAME;
+		charger->batt_ps_desc.name = Z2_DEFAULT_NAME;
 	} else
-		charger->batt_ps.name = info->batt_name;
+		charger->batt_ps_desc.name = info->batt_name;
 
-	charger->batt_ps.properties		= prop;
-	charger->batt_ps.num_properties		= props;
-	charger->batt_ps.type			= POWER_SUPPLY_TYPE_BATTERY;
-	charger->batt_ps.get_property		= z2_batt_get_property;
-	charger->batt_ps.external_power_changed	= z2_batt_ext_power_changed;
-	charger->batt_ps.use_for_apm		= 1;
+	charger->batt_ps_desc.properties	= prop;
+	charger->batt_ps_desc.num_properties	= props;
+	charger->batt_ps_desc.type		= POWER_SUPPLY_TYPE_BATTERY;
+	charger->batt_ps_desc.get_property	= z2_batt_get_property;
+	charger->batt_ps_desc.external_power_changed =
+						z2_batt_ext_power_changed;
+	charger->batt_ps_desc.use_for_apm	= 1;
 
 	return 0;
 }
@@ -187,6 +189,7 @@
 	int props = 1;	/* POWER_SUPPLY_PROP_PRESENT */
 	struct z2_charger *charger;
 	struct z2_battery_info *info = client->dev.platform_data;
+	struct power_supply_config psy_cfg = {};
 
 	if (info == NULL) {
 		dev_err(&client->dev,
@@ -203,6 +206,7 @@
 	charger->info = info;
 	charger->client = client;
 	i2c_set_clientdata(client, charger);
+	psy_cfg.drv_data = charger;
 
 	mutex_init(&charger->work_lock);
 
@@ -230,16 +234,20 @@
 
 	INIT_WORK(&charger->bat_work, z2_batt_work);
 
-	ret = power_supply_register(&client->dev, &charger->batt_ps);
-	if (ret)
+	charger->batt_ps = power_supply_register(&client->dev,
+						 &charger->batt_ps_desc,
+						 &psy_cfg);
+	if (IS_ERR(charger->batt_ps)) {
+		ret = PTR_ERR(charger->batt_ps);
 		goto err4;
+	}
 
 	schedule_work(&charger->bat_work);
 
 	return 0;
 
 err4:
-	kfree(charger->batt_ps.properties);
+	kfree(charger->batt_ps_desc.properties);
 err3:
 	if (info->charge_gpio >= 0 && gpio_is_valid(info->charge_gpio))
 		free_irq(gpio_to_irq(info->charge_gpio), charger);
@@ -257,9 +265,9 @@
 	struct z2_battery_info *info = charger->info;
 
 	cancel_work_sync(&charger->bat_work);
-	power_supply_unregister(&charger->batt_ps);
+	power_supply_unregister(charger->batt_ps);
 
-	kfree(charger->batt_ps.properties);
+	kfree(charger->batt_ps_desc.properties);
 	if (info->charge_gpio >= 0 && gpio_is_valid(info->charge_gpio)) {
 		free_irq(gpio_to_irq(info->charge_gpio), charger);
 		gpio_free(info->charge_gpio);
diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 9eec453..2ff73d7 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -29,6 +29,35 @@
 #include <linux/regmap.h>
 
 /*
+ * ACT8600 Global Register Map.
+ */
+#define ACT8600_SYS_MODE	0x00
+#define ACT8600_SYS_CTRL	0x01
+#define ACT8600_DCDC1_VSET	0x10
+#define ACT8600_DCDC1_CTRL	0x12
+#define ACT8600_DCDC2_VSET	0x20
+#define ACT8600_DCDC2_CTRL	0x22
+#define ACT8600_DCDC3_VSET	0x30
+#define ACT8600_DCDC3_CTRL	0x32
+#define ACT8600_SUDCDC4_VSET	0x40
+#define ACT8600_SUDCDC4_CTRL	0x41
+#define ACT8600_LDO5_VSET	0x50
+#define ACT8600_LDO5_CTRL	0x51
+#define ACT8600_LDO6_VSET	0x60
+#define ACT8600_LDO6_CTRL	0x61
+#define ACT8600_LDO7_VSET	0x70
+#define ACT8600_LDO7_CTRL	0x71
+#define ACT8600_LDO8_VSET	0x80
+#define ACT8600_LDO8_CTRL	0x81
+#define ACT8600_LDO910_CTRL	0x91
+#define ACT8600_APCH0		0xA1
+#define ACT8600_APCH1		0xA8
+#define ACT8600_APCH2		0xA9
+#define ACT8600_APCH_STAT	0xAA
+#define ACT8600_OTG0		0xB0
+#define ACT8600_OTG1		0xB2
+
+/*
  * ACT8846 Global Register Map.
  */
 #define	ACT8846_SYS0		0x00
@@ -94,10 +123,15 @@
 #define	ACT8865_ENA		0x80	/* ON - [7] */
 #define	ACT8865_VSEL_MASK	0x3F	/* VSET - [5:0] */
 
+
+#define ACT8600_LDO10_ENA		0x40	/* ON - [6] */
+#define ACT8600_SUDCDC_VSEL_MASK	0xFF	/* SUDCDC VSET - [7:0] */
+
 /*
  * ACT8865 voltage number
  */
 #define	ACT8865_VOLTAGE_NUM	64
+#define ACT8600_SUDCDC_VOLTAGE_NUM	255
 
 struct act8865 {
 	struct regmap *regmap;
@@ -116,6 +150,13 @@
 	REGULATOR_LINEAR_RANGE(2400000, 48, 63, 100000),
 };
 
+static const struct regulator_linear_range act8600_sudcdc_voltage_ranges[] = {
+	REGULATOR_LINEAR_RANGE(3000000, 0, 63, 0),
+	REGULATOR_LINEAR_RANGE(3000000, 64, 159, 100000),
+	REGULATOR_LINEAR_RANGE(12600000, 160, 191, 200000),
+	REGULATOR_LINEAR_RANGE(19000000, 191, 255, 400000),
+};
+
 static struct regulator_ops act8865_ops = {
 	.list_voltage		= regulator_list_voltage_linear_range,
 	.map_voltage		= regulator_map_voltage_linear_range,
@@ -126,9 +167,16 @@
 	.is_enabled		= regulator_is_enabled_regmap,
 };
 
-#define ACT88xx_REG(_name, _family, _id, _vsel_reg)			\
+static struct regulator_ops act8865_ldo_ops = {
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.is_enabled		= regulator_is_enabled_regmap,
+};
+
+#define ACT88xx_REG(_name, _family, _id, _vsel_reg, _supply)		\
 	[_family##_ID_##_id] = {					\
 		.name			= _name,			\
+		.supply_name		= _supply,			\
 		.id			= _family##_ID_##_id,		\
 		.type			= REGULATOR_VOLTAGE,		\
 		.ops			= &act8865_ops,			\
@@ -142,33 +190,80 @@
 		.owner			= THIS_MODULE,			\
 	}
 
+static const struct regulator_desc act8600_regulators[] = {
+	ACT88xx_REG("DCDC1", ACT8600, DCDC1, VSET, "vp1"),
+	ACT88xx_REG("DCDC2", ACT8600, DCDC2, VSET, "vp2"),
+	ACT88xx_REG("DCDC3", ACT8600, DCDC3, VSET, "vp3"),
+	{
+		.name = "SUDCDC_REG4",
+		.id = ACT8600_ID_SUDCDC4,
+		.ops = &act8865_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = ACT8600_SUDCDC_VOLTAGE_NUM,
+		.linear_ranges = act8600_sudcdc_voltage_ranges,
+		.n_linear_ranges = ARRAY_SIZE(act8600_sudcdc_voltage_ranges),
+		.vsel_reg = ACT8600_SUDCDC4_VSET,
+		.vsel_mask = ACT8600_SUDCDC_VSEL_MASK,
+		.enable_reg = ACT8600_SUDCDC4_CTRL,
+		.enable_mask = ACT8865_ENA,
+		.owner = THIS_MODULE,
+	},
+	ACT88xx_REG("LDO5", ACT8600, LDO5, VSET, "inl"),
+	ACT88xx_REG("LDO6", ACT8600, LDO6, VSET, "inl"),
+	ACT88xx_REG("LDO7", ACT8600, LDO7, VSET, "inl"),
+	ACT88xx_REG("LDO8", ACT8600, LDO8, VSET, "inl"),
+	{
+		.name = "LDO_REG9",
+		.id = ACT8600_ID_LDO9,
+		.ops = &act8865_ldo_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = 1,
+		.fixed_uV = 1800000,
+		.enable_reg = ACT8600_LDO910_CTRL,
+		.enable_mask = ACT8865_ENA,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO_REG10",
+		.id = ACT8600_ID_LDO10,
+		.ops = &act8865_ldo_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = 1,
+		.fixed_uV = 1200000,
+		.enable_reg = ACT8600_LDO910_CTRL,
+		.enable_mask = ACT8600_LDO10_ENA,
+		.owner = THIS_MODULE,
+	},
+};
+
 static const struct regulator_desc act8846_regulators[] = {
-	ACT88xx_REG("REG1", ACT8846, REG1, VSET),
-	ACT88xx_REG("REG2", ACT8846, REG2, VSET0),
-	ACT88xx_REG("REG3", ACT8846, REG3, VSET0),
-	ACT88xx_REG("REG4", ACT8846, REG4, VSET0),
-	ACT88xx_REG("REG5", ACT8846, REG5, VSET),
-	ACT88xx_REG("REG6", ACT8846, REG6, VSET),
-	ACT88xx_REG("REG7", ACT8846, REG7, VSET),
-	ACT88xx_REG("REG8", ACT8846, REG8, VSET),
-	ACT88xx_REG("REG9", ACT8846, REG9, VSET),
-	ACT88xx_REG("REG10", ACT8846, REG10, VSET),
-	ACT88xx_REG("REG11", ACT8846, REG11, VSET),
-	ACT88xx_REG("REG12", ACT8846, REG12, VSET),
+	ACT88xx_REG("REG1", ACT8846, REG1, VSET, "vp1"),
+	ACT88xx_REG("REG2", ACT8846, REG2, VSET0, "vp2"),
+	ACT88xx_REG("REG3", ACT8846, REG3, VSET0, "vp3"),
+	ACT88xx_REG("REG4", ACT8846, REG4, VSET0, "vp4"),
+	ACT88xx_REG("REG5", ACT8846, REG5, VSET, "inl1"),
+	ACT88xx_REG("REG6", ACT8846, REG6, VSET, "inl1"),
+	ACT88xx_REG("REG7", ACT8846, REG7, VSET, "inl1"),
+	ACT88xx_REG("REG8", ACT8846, REG8, VSET, "inl2"),
+	ACT88xx_REG("REG9", ACT8846, REG9, VSET, "inl2"),
+	ACT88xx_REG("REG10", ACT8846, REG10, VSET, "inl3"),
+	ACT88xx_REG("REG11", ACT8846, REG11, VSET, "inl3"),
+	ACT88xx_REG("REG12", ACT8846, REG12, VSET, "inl3"),
 };
 
 static const struct regulator_desc act8865_regulators[] = {
-	ACT88xx_REG("DCDC_REG1", ACT8865, DCDC1, VSET1),
-	ACT88xx_REG("DCDC_REG2", ACT8865, DCDC2, VSET1),
-	ACT88xx_REG("DCDC_REG3", ACT8865, DCDC3, VSET1),
-	ACT88xx_REG("LDO_REG1", ACT8865, LDO1, VSET),
-	ACT88xx_REG("LDO_REG2", ACT8865, LDO2, VSET),
-	ACT88xx_REG("LDO_REG3", ACT8865, LDO3, VSET),
-	ACT88xx_REG("LDO_REG4", ACT8865, LDO4, VSET),
+	ACT88xx_REG("DCDC_REG1", ACT8865, DCDC1, VSET1, "vp1"),
+	ACT88xx_REG("DCDC_REG2", ACT8865, DCDC2, VSET1, "vp2"),
+	ACT88xx_REG("DCDC_REG3", ACT8865, DCDC3, VSET1, "vp3"),
+	ACT88xx_REG("LDO_REG1", ACT8865, LDO1, VSET, "inl45"),
+	ACT88xx_REG("LDO_REG2", ACT8865, LDO2, VSET, "inl45"),
+	ACT88xx_REG("LDO_REG3", ACT8865, LDO3, VSET, "inl67"),
+	ACT88xx_REG("LDO_REG4", ACT8865, LDO4, VSET, "inl67"),
 };
 
 #ifdef CONFIG_OF
 static const struct of_device_id act8865_dt_ids[] = {
+	{ .compatible = "active-semi,act8600", .data = (void *)ACT8600 },
 	{ .compatible = "active-semi,act8846", .data = (void *)ACT8846 },
 	{ .compatible = "active-semi,act8865", .data = (void *)ACT8865 },
 	{ }
@@ -200,6 +295,19 @@
 	[ACT8865_ID_LDO4]	= { .name = "LDO_REG4"},
 };
 
+static struct of_regulator_match act8600_matches[] = {
+	[ACT8600_ID_DCDC1]	= { .name = "DCDC_REG1"},
+	[ACT8600_ID_DCDC2]	= { .name = "DCDC_REG2"},
+	[ACT8600_ID_DCDC3]	= { .name = "DCDC_REG3"},
+	[ACT8600_ID_SUDCDC4]	= { .name = "SUDCDC_REG4"},
+	[ACT8600_ID_LDO5]	= { .name = "LDO_REG5"},
+	[ACT8600_ID_LDO6]	= { .name = "LDO_REG6"},
+	[ACT8600_ID_LDO7]	= { .name = "LDO_REG7"},
+	[ACT8600_ID_LDO8]	= { .name = "LDO_REG8"},
+	[ACT8600_ID_LDO9]	= { .name = "LDO_REG9"},
+	[ACT8600_ID_LDO10]	= { .name = "LDO_REG10"},
+};
+
 static int act8865_pdata_from_dt(struct device *dev,
 				 struct device_node **of_node,
 				 struct act8865_platform_data *pdata,
@@ -217,6 +325,10 @@
 	}
 
 	switch (type) {
+	case ACT8600:
+		matches = act8600_matches;
+		num_matches = ARRAY_SIZE(act8600_matches);
+		break;
 	case ACT8846:
 		matches = act8846_matches;
 		num_matches = ARRAY_SIZE(act8846_matches);
@@ -317,6 +429,12 @@
 	}
 
 	switch (type) {
+	case ACT8600:
+		regulators = act8600_regulators;
+		num_regulators = ARRAY_SIZE(act8600_regulators);
+		off_reg = -1;
+		off_mask = -1;
+		break;
 	case ACT8846:
 		regulators = act8846_regulators;
 		num_regulators = ARRAY_SIZE(act8846_regulators);
@@ -366,7 +484,7 @@
 	}
 
 	if (of_device_is_system_power_controller(dev->of_node)) {
-		if (!pm_power_off) {
+		if (!pm_power_off && (off_reg > 0)) {
 			act8865_i2c_client = client;
 			act8865->off_reg = off_reg;
 			act8865->off_mask = off_mask;
@@ -402,6 +520,7 @@
 }
 
 static const struct i2c_device_id act8865_ids[] = {
+	{ .name = "act8600", .driver_data = ACT8600 },
 	{ .name = "act8846", .driver_data = ACT8846 },
 	{ .name = "act8865", .driver_data = ACT8865 },
 	{ },
diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c
index 8169165..a1d07d3 100644
--- a/drivers/regulator/arizona-ldo1.c
+++ b/drivers/regulator/arizona-ldo1.c
@@ -282,6 +282,9 @@
 		arizona->external_dcvdd = true;
 
 	ldo1->regulator = devm_regulator_register(&pdev->dev, desc, &config);
+
+	of_node_put(config.of_node);
+
 	if (IS_ERR(ldo1->regulator)) {
 		ret = PTR_ERR(ldo1->regulator);
 		dev_err(arizona->dev, "Failed to register LDO1 supply: %d\n",
@@ -289,8 +292,6 @@
 		return ret;
 	}
 
-	of_node_put(config.of_node);
-
 	platform_set_drvdata(pdev, ldo1);
 
 	return 0;
diff --git a/drivers/regulator/arizona-micsupp.c b/drivers/regulator/arizona-micsupp.c
index 2007900..4116e74 100644
--- a/drivers/regulator/arizona-micsupp.c
+++ b/drivers/regulator/arizona-micsupp.c
@@ -284,6 +284,9 @@
 	micsupp->regulator = devm_regulator_register(&pdev->dev,
 						     desc,
 						     &config);
+
+	of_node_put(config.of_node);
+
 	if (IS_ERR(micsupp->regulator)) {
 		ret = PTR_ERR(micsupp->regulator);
 		dev_err(arizona->dev, "Failed to register mic supply: %d\n",
@@ -291,8 +294,6 @@
 		return ret;
 	}
 
-	of_node_put(config.of_node);
-
 	platform_set_drvdata(pdev, micsupp);
 
 	return 0;
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a4a8a6d..443eaab 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -648,10 +648,12 @@
 	if (err < 0)
 		return 0;
 
-	if (!rdev->desc->ops->get_optimum_mode)
+	if (!rdev->desc->ops->get_optimum_mode &&
+	    !rdev->desc->ops->set_load)
 		return 0;
 
-	if (!rdev->desc->ops->set_mode)
+	if (!rdev->desc->ops->set_mode &&
+	    !rdev->desc->ops->set_load)
 		return -EINVAL;
 
 	/* get output voltage */
@@ -676,22 +678,29 @@
 	list_for_each_entry(sibling, &rdev->consumer_list, list)
 		current_uA += sibling->uA_load;
 
-	/* now get the optimum mode for our new total regulator load */
-	mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV,
-						  output_uV, current_uA);
+	if (rdev->desc->ops->set_load) {
+		/* set the optimum mode for our new total regulator load */
+		err = rdev->desc->ops->set_load(rdev, current_uA);
+		if (err < 0)
+			rdev_err(rdev, "failed to set load %d\n", current_uA);
+	} else {
+		/* now get the optimum mode for our new total regulator load */
+		mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV,
+							 output_uV, current_uA);
 
-	/* check the new mode is allowed */
-	err = regulator_mode_constrain(rdev, &mode);
-	if (err < 0) {
-		rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV\n",
-			 current_uA, input_uV, output_uV);
-		return err;
+		/* check the new mode is allowed */
+		err = regulator_mode_constrain(rdev, &mode);
+		if (err < 0) {
+			rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV\n",
+				 current_uA, input_uV, output_uV);
+			return err;
+		}
+
+		err = rdev->desc->ops->set_mode(rdev, mode);
+		if (err < 0)
+			rdev_err(rdev, "failed to set optimum mode %x\n", mode);
 	}
 
-	err = rdev->desc->ops->set_mode(rdev, mode);
-	if (err < 0)
-		rdev_err(rdev, "failed to set optimum mode %x\n", mode);
-
 	return err;
 }
 
@@ -1316,6 +1325,54 @@
 	return NULL;
 }
 
+static int regulator_resolve_supply(struct regulator_dev *rdev)
+{
+	struct regulator_dev *r;
+	struct device *dev = rdev->dev.parent;
+	int ret;
+
+	/* No supply to resovle? */
+	if (!rdev->supply_name)
+		return 0;
+
+	/* Supply already resolved? */
+	if (rdev->supply)
+		return 0;
+
+	r = regulator_dev_lookup(dev, rdev->supply_name, &ret);
+	if (ret == -ENODEV) {
+		/*
+		 * No supply was specified for this regulator and
+		 * there will never be one.
+		 */
+		return 0;
+	}
+
+	if (!r) {
+		dev_err(dev, "Failed to resolve %s-supply for %s\n",
+			rdev->supply_name, rdev->desc->name);
+		return -EPROBE_DEFER;
+	}
+
+	/* Recursively resolve the supply of the supply */
+	ret = regulator_resolve_supply(r);
+	if (ret < 0)
+		return ret;
+
+	ret = set_supply(rdev, r);
+	if (ret < 0)
+		return ret;
+
+	/* Cascade always-on state to supply */
+	if (_regulator_is_enabled(rdev)) {
+		ret = regulator_enable(rdev->supply);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* Internal regulator request function */
 static struct regulator *_regulator_get(struct device *dev, const char *id,
 					bool exclusive, bool allow_dummy)
@@ -1385,6 +1442,12 @@
 		goto out;
 	}
 
+	ret = regulator_resolve_supply(rdev);
+	if (ret < 0) {
+		regulator = ERR_PTR(ret);
+		goto out;
+	}
+
 	if (!try_module_get(rdev->owner))
 		goto out;
 
@@ -2998,7 +3061,7 @@
 EXPORT_SYMBOL_GPL(regulator_get_mode);
 
 /**
- * regulator_set_optimum_mode - set regulator optimum operating mode
+ * regulator_set_load - set regulator load
  * @regulator: regulator source
  * @uA_load: load current
  *
@@ -3021,9 +3084,9 @@
  * DRMS will sum the total requested load on the regulator and change
  * to the most efficient operating mode if platform constraints allow.
  *
- * Returns the new regulator mode or error.
+ * On error a negative errno is returned.
  */
-int regulator_set_optimum_mode(struct regulator *regulator, int uA_load)
+int regulator_set_load(struct regulator *regulator, int uA_load)
 {
 	struct regulator_dev *rdev = regulator->rdev;
 	int ret;
@@ -3035,7 +3098,7 @@
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(regulator_set_optimum_mode);
+EXPORT_SYMBOL_GPL(regulator_set_load);
 
 /**
  * regulator_allow_bypass - allow the regulator to go into bypass mode
@@ -3499,7 +3562,18 @@
 
 static void rdev_init_debugfs(struct regulator_dev *rdev)
 {
-	rdev->debugfs = debugfs_create_dir(rdev_get_name(rdev), debugfs_root);
+	struct device *parent = rdev->dev.parent;
+	const char *rname = rdev_get_name(rdev);
+	char name[NAME_MAX];
+
+	/* Avoid duplicate debugfs directory names */
+	if (parent && rname == rdev->desc->name) {
+		snprintf(name, sizeof(name), "%s-%s", dev_name(parent),
+			 rname);
+		rname = name;
+	}
+
+	rdev->debugfs = debugfs_create_dir(rname, debugfs_root);
 	if (!rdev->debugfs) {
 		rdev_warn(rdev, "Failed to create debugfs directory\n");
 		return;
@@ -3533,7 +3607,6 @@
 	struct regulator_dev *rdev;
 	struct device *dev;
 	int ret, i;
-	const char *supply = NULL;
 
 	if (regulator_desc == NULL || cfg == NULL)
 		return ERR_PTR(-EINVAL);
@@ -3641,41 +3714,10 @@
 		goto scrub;
 
 	if (init_data && init_data->supply_regulator)
-		supply = init_data->supply_regulator;
+		rdev->supply_name = init_data->supply_regulator;
 	else if (regulator_desc->supply_name)
-		supply = regulator_desc->supply_name;
+		rdev->supply_name = regulator_desc->supply_name;
 
-	if (supply) {
-		struct regulator_dev *r;
-
-		r = regulator_dev_lookup(dev, supply, &ret);
-
-		if (ret == -ENODEV) {
-			/*
-			 * No supply was specified for this regulator and
-			 * there will never be one.
-			 */
-			ret = 0;
-			goto add_dev;
-		} else if (!r) {
-			dev_err(dev, "Failed to find supply %s\n", supply);
-			ret = -EPROBE_DEFER;
-			goto scrub;
-		}
-
-		ret = set_supply(rdev, r);
-		if (ret < 0)
-			goto scrub;
-
-		/* Enable supply if rail is enabled */
-		if (_regulator_is_enabled(rdev)) {
-			ret = regulator_enable(rdev->supply);
-			if (ret < 0)
-				goto scrub;
-		}
-	}
-
-add_dev:
 	/* add consumers devices */
 	if (init_data) {
 		for (i = 0; i < init_data->num_consumer_supplies; i++) {
@@ -3702,8 +3744,6 @@
 	unset_regulator_supplies(rdev);
 
 scrub:
-	if (rdev->supply)
-		_regulator_put(rdev->supply);
 	regulator_ena_gpio_free(rdev);
 	kfree(rdev->constraints);
 wash:
@@ -3936,6 +3976,110 @@
 #endif
 };
 
+#ifdef CONFIG_DEBUG_FS
+static void regulator_summary_show_subtree(struct seq_file *s,
+					   struct regulator_dev *rdev,
+					   int level)
+{
+	struct list_head *list = s->private;
+	struct regulator_dev *child;
+	struct regulation_constraints *c;
+	struct regulator *consumer;
+
+	if (!rdev)
+		return;
+
+	seq_printf(s, "%*s%-*s %3d %4d %6d ",
+		   level * 3 + 1, "",
+		   30 - level * 3, rdev_get_name(rdev),
+		   rdev->use_count, rdev->open_count, rdev->bypass_count);
+
+	seq_printf(s, "%5dmV ", _regulator_get_voltage(rdev) / 1000);
+	seq_printf(s, "%5dmA ", _regulator_get_current_limit(rdev) / 1000);
+
+	c = rdev->constraints;
+	if (c) {
+		switch (rdev->desc->type) {
+		case REGULATOR_VOLTAGE:
+			seq_printf(s, "%5dmV %5dmV ",
+				   c->min_uV / 1000, c->max_uV / 1000);
+			break;
+		case REGULATOR_CURRENT:
+			seq_printf(s, "%5dmA %5dmA ",
+				   c->min_uA / 1000, c->max_uA / 1000);
+			break;
+		}
+	}
+
+	seq_puts(s, "\n");
+
+	list_for_each_entry(consumer, &rdev->consumer_list, list) {
+		if (consumer->dev->class == &regulator_class)
+			continue;
+
+		seq_printf(s, "%*s%-*s ",
+			   (level + 1) * 3 + 1, "",
+			   30 - (level + 1) * 3, dev_name(consumer->dev));
+
+		switch (rdev->desc->type) {
+		case REGULATOR_VOLTAGE:
+			seq_printf(s, "%37dmV %5dmV",
+				   consumer->min_uV / 1000,
+				   consumer->max_uV / 1000);
+			break;
+		case REGULATOR_CURRENT:
+			break;
+		}
+
+		seq_puts(s, "\n");
+	}
+
+	list_for_each_entry(child, list, list) {
+		/* handle only non-root regulators supplied by current rdev */
+		if (!child->supply || child->supply->rdev != rdev)
+			continue;
+
+		regulator_summary_show_subtree(s, child, level + 1);
+	}
+}
+
+static int regulator_summary_show(struct seq_file *s, void *data)
+{
+	struct list_head *list = s->private;
+	struct regulator_dev *rdev;
+
+	seq_puts(s, " regulator                      use open bypass voltage current     min     max\n");
+	seq_puts(s, "-------------------------------------------------------------------------------\n");
+
+	mutex_lock(&regulator_list_mutex);
+
+	list_for_each_entry(rdev, list, list) {
+		if (rdev->supply)
+			continue;
+
+		regulator_summary_show_subtree(s, rdev, 0);
+	}
+
+	mutex_unlock(&regulator_list_mutex);
+
+	return 0;
+}
+
+static int regulator_summary_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, regulator_summary_show, inode->i_private);
+}
+#endif
+
+static const struct file_operations regulator_summary_fops = {
+#ifdef CONFIG_DEBUG_FS
+	.open		= regulator_summary_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+#endif
+};
+
 static int __init regulator_init(void)
 {
 	int ret;
@@ -3949,6 +4093,9 @@
 	debugfs_create_file("supply_map", 0444, debugfs_root, NULL,
 			    &supply_map_fops);
 
+	debugfs_create_file("regulator_summary", 0444, debugfs_root,
+			    &regulator_list, &regulator_summary_fops);
+
 	regulator_dummy_init();
 
 	return ret;
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index 0134341..df79e4b 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -305,8 +305,7 @@
 
 	if (reg_val & DA9211_E_OV_CURR_A) {
 		regulator_notifier_call_chain(chip->rdev[0],
-			REGULATOR_EVENT_OVER_CURRENT,
-			rdev_get_drvdata(chip->rdev[0]));
+			REGULATOR_EVENT_OVER_CURRENT, NULL);
 
 		err = regmap_write(chip->regmap, DA9211_REG_EVENT_B,
 			DA9211_E_OV_CURR_A);
@@ -318,8 +317,7 @@
 
 	if (reg_val & DA9211_E_OV_CURR_B) {
 		regulator_notifier_call_chain(chip->rdev[1],
-			REGULATOR_EVENT_OVER_CURRENT,
-			rdev_get_drvdata(chip->rdev[1]));
+			REGULATOR_EVENT_OVER_CURRENT, NULL);
 
 		err = regmap_write(chip->regmap, DA9211_REG_EVENT_B,
 			DA9211_E_OV_CURR_B);
@@ -344,7 +342,7 @@
 
 	ret = regmap_read(chip->regmap, DA9211_REG_CONFIG_E, &data);
 	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read CONTROL_E reg: %d\n", ret);
+		dev_err(chip->dev, "Failed to read CONFIG_E reg: %d\n", ret);
 		return ret;
 	}
 
diff --git a/drivers/regulator/dbx500-prcmu.c b/drivers/regulator/dbx500-prcmu.c
index 2d16b9f..3963dfa 100644
--- a/drivers/regulator/dbx500-prcmu.c
+++ b/drivers/regulator/dbx500-prcmu.c
@@ -95,14 +95,9 @@
 
 static int ux500_regulator_power_state_cnt_print(struct seq_file *s, void *p)
 {
-	struct device *dev = s->private;
-	int err;
-
 	/* print power state count */
-	err = seq_printf(s, "ux500-regulator power state count: %i\n",
-		power_state_active_get());
-	if (err < 0)
-		dev_err(dev, "seq_printf overflow\n");
+	seq_printf(s, "ux500-regulator power state count: %i\n",
+		   power_state_active_get());
 
 	return 0;
 }
@@ -124,19 +119,11 @@
 
 static int ux500_regulator_status_print(struct seq_file *s, void *p)
 {
-	struct device *dev = s->private;
-	int err;
 	int i;
 
 	/* print dump header */
-	err = seq_puts(s, "ux500-regulator status:\n");
-	if (err < 0)
-		dev_err(dev, "seq_puts overflow\n");
-
-	err = seq_printf(s, "%31s : %8s : %8s\n", "current",
-		"before", "after");
-	if (err < 0)
-		dev_err(dev, "seq_printf overflow\n");
+	seq_puts(s, "ux500-regulator status:\n");
+	seq_printf(s, "%31s : %8s : %8s\n", "current", "before", "after");
 
 	for (i = 0; i < rdebug.num_regulators; i++) {
 		struct dbx500_regulator_info *info;
@@ -144,12 +131,11 @@
 		info = &rdebug.regulator_array[i];
 
 		/* print status */
-		err = seq_printf(s, "%20s : %8s : %8s : %8s\n", info->desc.name,
-			info->is_enabled ? "enabled" : "disabled",
-			rdebug.state_before_suspend[i] ? "enabled" : "disabled",
-			rdebug.state_after_suspend[i] ? "enabled" : "disabled");
-		if (err < 0)
-			dev_err(dev, "seq_printf overflow\n");
+		seq_printf(s, "%20s : %8s : %8s : %8s\n",
+			   info->desc.name,
+			   info->is_enabled ? "enabled" : "disabled",
+			   rdebug.state_before_suspend[i] ? "enabled" : "disabled",
+			   rdebug.state_after_suspend[i] ? "enabled" : "disabled");
 	}
 
 	return 0;
diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c
index 8f785bc..6ec1d40 100644
--- a/drivers/regulator/devres.c
+++ b/drivers/regulator/devres.c
@@ -413,3 +413,88 @@
 		devm_regulator_unregister_supply_alias(dev, id[i]);
 }
 EXPORT_SYMBOL_GPL(devm_regulator_bulk_unregister_supply_alias);
+
+struct regulator_notifier_match {
+	struct regulator *regulator;
+	struct notifier_block *nb;
+};
+
+static int devm_regulator_match_notifier(struct device *dev, void *res,
+					 void *data)
+{
+	struct regulator_notifier_match *match = res;
+	struct regulator_notifier_match *target = data;
+
+	return match->regulator == target->regulator && match->nb == target->nb;
+}
+
+static void devm_regulator_destroy_notifier(struct device *dev, void *res)
+{
+	struct regulator_notifier_match *match = res;
+
+	regulator_unregister_notifier(match->regulator, match->nb);
+}
+
+/**
+ * devm_regulator_register_notifier - Resource managed
+ * regulator_register_notifier
+ *
+ * @regulator: regulator source
+ * @nb: notifier block
+ *
+ * The notifier will be registers under the consumer device and be
+ * automatically be unregistered when the source device is unbound.
+ */
+int devm_regulator_register_notifier(struct regulator *regulator,
+				     struct notifier_block *nb)
+{
+	struct regulator_notifier_match *match;
+	int ret;
+
+	match = devres_alloc(devm_regulator_destroy_notifier,
+			     sizeof(struct regulator_notifier_match),
+			     GFP_KERNEL);
+	if (!match)
+		return -ENOMEM;
+
+	match->regulator = regulator;
+	match->nb = nb;
+
+	ret = regulator_register_notifier(regulator, nb);
+	if (ret < 0) {
+		devres_free(match);
+		return ret;
+	}
+
+	devres_add(regulator->dev, match);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_regulator_register_notifier);
+
+/**
+ * devm_regulator_unregister_notifier - Resource managed
+ * regulator_unregister_notifier()
+ *
+ * @regulator: regulator source
+ * @nb: notifier block
+ *
+ * Unregister a notifier registered with devm_regulator_register_notifier().
+ * Normally this function will not need to be called and the resource
+ * management code will ensure that the resource is freed.
+ */
+void devm_regulator_unregister_notifier(struct regulator *regulator,
+					struct notifier_block *nb)
+{
+	struct regulator_notifier_match match;
+	int rc;
+
+	match.regulator = regulator;
+	match.nb = nb;
+
+	rc = devres_release(regulator->dev, devm_regulator_destroy_notifier,
+			    devm_regulator_match_notifier, &match);
+	if (rc != 0)
+		WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_regulator_unregister_notifier);
diff --git a/drivers/regulator/max77693.c b/drivers/regulator/max77693.c
index 07b313e..9665a48 100644
--- a/drivers/regulator/max77693.c
+++ b/drivers/regulator/max77693.c
@@ -128,6 +128,8 @@
 #define regulator_desc_esafeout(_num)	{			\
 	.name		= "ESAFEOUT"#_num,			\
 	.id		= MAX77693_ESAFEOUT##_num,		\
+	.of_match	= of_match_ptr("ESAFEOUT"#_num),	\
+	.regulators_node	= of_match_ptr("regulators"),	\
 	.n_voltages	= 4,					\
 	.ops		= &max77693_safeout_ops,		\
 	.type		= REGULATOR_VOLTAGE,			\
@@ -145,6 +147,8 @@
 	{
 		.name = "CHARGER",
 		.id = MAX77693_CHARGER,
+		.of_match = of_match_ptr("CHARGER"),
+		.regulators_node = of_match_ptr("regulators"),
 		.ops = &max77693_charger_ops,
 		.type = REGULATOR_CURRENT,
 		.owner = THIS_MODULE,
@@ -154,102 +158,23 @@
 	},
 };
 
-#ifdef CONFIG_OF
-static int max77693_pmic_dt_parse_rdata(struct device *dev,
-					struct max77693_regulator_data **rdata)
-{
-	struct device_node *np;
-	struct of_regulator_match *rmatch;
-	struct max77693_regulator_data *tmp;
-	int i, matched = 0;
-
-	np = of_get_child_by_name(dev->parent->of_node, "regulators");
-	if (!np)
-		return -EINVAL;
-
-	rmatch = devm_kzalloc(dev,
-		 sizeof(*rmatch) * ARRAY_SIZE(regulators), GFP_KERNEL);
-	if (!rmatch) {
-		of_node_put(np);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(regulators); i++)
-		rmatch[i].name = regulators[i].name;
-
-	matched = of_regulator_match(dev, np, rmatch, ARRAY_SIZE(regulators));
-	of_node_put(np);
-	if (matched <= 0)
-		return matched;
-	*rdata = devm_kzalloc(dev, sizeof(**rdata) * matched, GFP_KERNEL);
-	if (!(*rdata))
-		return -ENOMEM;
-
-	tmp = *rdata;
-
-	for (i = 0; i < matched; i++) {
-		tmp->initdata = rmatch[i].init_data;
-		tmp->of_node = rmatch[i].of_node;
-		tmp->id = regulators[i].id;
-		tmp++;
-	}
-
-	return matched;
-}
-#else
-static int max77693_pmic_dt_parse_rdata(struct device *dev,
-					struct max77693_regulator_data **rdata)
-{
-	return 0;
-}
-#endif /* CONFIG_OF */
-
-static int max77693_pmic_init_rdata(struct device *dev,
-				    struct max77693_regulator_data **rdata)
-{
-	struct max77693_platform_data *pdata;
-	int num_regulators = 0;
-
-	pdata = dev_get_platdata(dev->parent);
-	if (pdata) {
-		*rdata = pdata->regulators;
-		num_regulators = pdata->num_regulators;
-	}
-
-	if (!(*rdata) && dev->parent->of_node)
-		num_regulators = max77693_pmic_dt_parse_rdata(dev, rdata);
-
-	return num_regulators;
-}
-
 static int max77693_pmic_probe(struct platform_device *pdev)
 {
 	struct max77693_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-	struct max77693_regulator_data *rdata = NULL;
-	int num_rdata, i;
+	int i;
 	struct regulator_config config = { };
 
-	num_rdata = max77693_pmic_init_rdata(&pdev->dev, &rdata);
-	if (!rdata || num_rdata <= 0) {
-		dev_err(&pdev->dev, "No init data supplied.\n");
-		return -ENODEV;
-	}
-
-	config.dev = &pdev->dev;
+	config.dev = iodev->dev;
 	config.regmap = iodev->regmap;
 
-	for (i = 0; i < num_rdata; i++) {
-		int id = rdata[i].id;
+	for (i = 0; i < ARRAY_SIZE(regulators); i++) {
 		struct regulator_dev *rdev;
 
-		config.init_data = rdata[i].initdata;
-		config.of_node = rdata[i].of_node;
-
 		rdev = devm_regulator_register(&pdev->dev,
-						&regulators[id], &config);
+						&regulators[i], &config);
 		if (IS_ERR(rdev)) {
 			dev_err(&pdev->dev,
-				"Failed to initialize regulator-%d\n", id);
+				"Failed to initialize regulator-%d\n", i);
 			return PTR_ERR(rdev);
 		}
 	}
diff --git a/drivers/regulator/max8660.c b/drivers/regulator/max8660.c
index 7eee2ca..4071d74 100644
--- a/drivers/regulator/max8660.c
+++ b/drivers/regulator/max8660.c
@@ -382,7 +382,7 @@
 				   const struct i2c_device_id *i2c_id)
 {
 	struct device *dev = &client->dev;
-	struct max8660_platform_data *pdata = dev_get_platdata(dev);
+	struct max8660_platform_data pdata_of, *pdata = dev_get_platdata(dev);
 	struct regulator_config config = { };
 	struct max8660 *max8660;
 	int boot_on, i, id, ret = -EINVAL;
@@ -391,7 +391,6 @@
 
 	if (dev->of_node && !pdata) {
 		const struct of_device_id *id;
-		struct max8660_platform_data pdata_of;
 
 		id = of_match_device(of_match_ptr(max8660_dt_ids), dev);
 		if (!id)
@@ -443,9 +442,9 @@
 	for (i = 0; i < pdata->num_subdevs; i++) {
 
 		if (!pdata->subdevs[i].platform_data)
-			return ret;
-
-		boot_on = pdata->subdevs[i].platform_data->constraints.boot_on;
+			boot_on = false;
+		else
+			boot_on = pdata->subdevs[i].platform_data->constraints.boot_on;
 
 		switch (pdata->subdevs[i].id) {
 		case MAX8660_V3:
diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 1819831..8217613 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -916,6 +916,9 @@
 			    (id == PALMAS_REG_LDO6))
 				desc->enable_time = 2000;
 		} else {
+			if (!ddata->has_regen3 && id == PALMAS_REG_REGEN3)
+				continue;
+
 			desc->n_voltages = 1;
 			if (reg_init && reg_init->roof_floor)
 				desc->ops = &palmas_ops_ext_control_extreg;
@@ -1398,6 +1401,7 @@
 	.ldo_begin = PALMAS_REG_LDO1,
 	.ldo_end = PALMAS_REG_LDOUSB,
 	.max_reg = PALMAS_NUM_REGS,
+	.has_regen3 = true,
 	.palmas_regs_info = palmas_generic_regs_info,
 	.palmas_matches = palmas_matches,
 	.sleep_req_info = palma_sleep_req_info,
@@ -1411,6 +1415,7 @@
 	.ldo_begin = TPS65917_REG_LDO1,
 	.ldo_end = TPS65917_REG_LDO5,
 	.max_reg = TPS65917_NUM_REGS,
+	.has_regen3 = true,
 	.palmas_regs_info = tps65917_regs_info,
 	.palmas_matches = tps65917_matches,
 	.sleep_req_info = tps65917_sleep_req_info,
@@ -1505,7 +1510,7 @@
 	pdata->ldo6_vibrator = of_property_read_bool(node, "ti,ldo6-vibrator");
 }
 
-static struct of_device_id of_palmas_match_tbl[] = {
+static const struct of_device_id of_palmas_match_tbl[] = {
 	{
 		.compatible = "ti,palmas-pmic",
 		.data = &palmas_ddata,
@@ -1572,9 +1577,11 @@
 	if (!pmic)
 		return -ENOMEM;
 
-	if (of_device_is_compatible(node, "ti,tps659038-pmic"))
+	if (of_device_is_compatible(node, "ti,tps659038-pmic")) {
 		palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr =
 							TPS659038_REGEN2_CTRL;
+		palmas_ddata.has_regen3 = false;
+	}
 
 	pmic->dev = &pdev->dev;
 	pmic->palmas = palmas;
diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c
index 00c5cc3..e254272 100644
--- a/drivers/regulator/qcom_rpm-regulator.c
+++ b/drivers/regulator/qcom_rpm-regulator.c
@@ -393,6 +393,28 @@
 	return vreg->is_enabled;
 }
 
+static int rpm_reg_set_load(struct regulator_dev *rdev, int load_uA)
+{
+	struct qcom_rpm_reg *vreg = rdev_get_drvdata(rdev);
+	const struct rpm_reg_parts *parts = vreg->parts;
+	const struct request_member *req = &parts->ia;
+	int load_mA = load_uA / 1000;
+	int max_mA = req->mask >> req->shift;
+	int ret;
+
+	if (req->mask == 0)
+		return -EINVAL;
+
+	if (load_mA > max_mA)
+		load_mA = max_mA;
+
+	mutex_lock(&vreg->lock);
+	ret = rpm_reg_write(vreg, req, load_mA);
+	mutex_unlock(&vreg->lock);
+
+	return ret;
+}
+
 static struct regulator_ops uV_ops = {
 	.list_voltage = regulator_list_voltage_linear_range,
 
@@ -402,6 +424,8 @@
 	.enable = rpm_reg_uV_enable,
 	.disable = rpm_reg_uV_disable,
 	.is_enabled = rpm_reg_is_enabled,
+
+	.set_load = rpm_reg_set_load,
 };
 
 static struct regulator_ops mV_ops = {
@@ -413,6 +437,8 @@
 	.enable = rpm_reg_mV_enable,
 	.disable = rpm_reg_mV_disable,
 	.is_enabled = rpm_reg_is_enabled,
+
+	.set_load = rpm_reg_set_load,
 };
 
 static struct regulator_ops switch_ops = {
@@ -581,31 +607,6 @@
 	.supports_force_mode_bypass = false,
 };
 
-static const struct of_device_id rpm_of_match[] = {
-	{ .compatible = "qcom,rpm-pm8058-pldo",     .data = &pm8058_pldo },
-	{ .compatible = "qcom,rpm-pm8058-nldo",     .data = &pm8058_nldo },
-	{ .compatible = "qcom,rpm-pm8058-smps",     .data = &pm8058_smps },
-	{ .compatible = "qcom,rpm-pm8058-ncp",      .data = &pm8058_ncp },
-	{ .compatible = "qcom,rpm-pm8058-switch",   .data = &pm8058_switch },
-
-	{ .compatible = "qcom,rpm-pm8901-pldo",     .data = &pm8901_pldo },
-	{ .compatible = "qcom,rpm-pm8901-nldo",     .data = &pm8901_nldo },
-	{ .compatible = "qcom,rpm-pm8901-ftsmps",   .data = &pm8901_ftsmps },
-	{ .compatible = "qcom,rpm-pm8901-switch",   .data = &pm8901_switch },
-
-	{ .compatible = "qcom,rpm-pm8921-pldo",     .data = &pm8921_pldo },
-	{ .compatible = "qcom,rpm-pm8921-nldo",     .data = &pm8921_nldo },
-	{ .compatible = "qcom,rpm-pm8921-nldo1200", .data = &pm8921_nldo1200 },
-	{ .compatible = "qcom,rpm-pm8921-smps",     .data = &pm8921_smps },
-	{ .compatible = "qcom,rpm-pm8921-ftsmps",   .data = &pm8921_ftsmps },
-	{ .compatible = "qcom,rpm-pm8921-ncp",      .data = &pm8921_ncp },
-	{ .compatible = "qcom,rpm-pm8921-switch",   .data = &pm8921_switch },
-
-	{ .compatible = "qcom,rpm-smb208", .data = &smb208_smps },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, rpm_of_match);
-
 static int rpm_reg_set(struct qcom_rpm_reg *vreg,
 		       const struct request_member *req,
 		       const int value)
@@ -619,7 +620,9 @@
 	return 0;
 }
 
-static int rpm_reg_of_parse_freq(struct device *dev, struct qcom_rpm_reg *vreg)
+static int rpm_reg_of_parse_freq(struct device *dev,
+				 struct device_node *node,
+				 struct qcom_rpm_reg *vreg)
 {
 	static const int freq_table[] = {
 		19200000, 9600000, 6400000, 4800000, 3840000, 3200000, 2740000,
@@ -633,7 +636,7 @@
 	int i;
 
 	key = "qcom,switch-mode-frequency";
-	ret = of_property_read_u32(dev->of_node, key, &freq);
+	ret = of_property_read_u32(node, key, &freq);
 	if (ret) {
 		dev_err(dev, "regulator requires %s property\n", key);
 		return -EINVAL;
@@ -650,84 +653,40 @@
 	return -EINVAL;
 }
 
-static int rpm_reg_probe(struct platform_device *pdev)
+static int rpm_reg_of_parse(struct device_node *node,
+			    const struct regulator_desc *desc,
+			    struct regulator_config *config)
 {
-	struct regulator_init_data *initdata;
-	const struct qcom_rpm_reg *template;
-	const struct of_device_id *match;
-	struct regulator_config config = { };
-	struct regulator_dev *rdev;
-	struct qcom_rpm_reg *vreg;
+	struct qcom_rpm_reg *vreg = config->driver_data;
+	struct device *dev = config->dev;
 	const char *key;
 	u32 force_mode;
 	bool pwm;
 	u32 val;
 	int ret;
 
-	match = of_match_device(rpm_of_match, &pdev->dev);
-	template = match->data;
-
-	vreg = devm_kmalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
-	if (!vreg) {
-		dev_err(&pdev->dev, "failed to allocate vreg\n");
-		return -ENOMEM;
-	}
-	memcpy(vreg, template, sizeof(*vreg));
-	mutex_init(&vreg->lock);
-	vreg->dev = &pdev->dev;
-	vreg->desc.id = -1;
-	vreg->desc.owner = THIS_MODULE;
-	vreg->desc.type = REGULATOR_VOLTAGE;
-	vreg->desc.name = pdev->dev.of_node->name;
-	vreg->desc.supply_name = "vin";
-
-	vreg->rpm = dev_get_drvdata(pdev->dev.parent);
-	if (!vreg->rpm) {
-		dev_err(&pdev->dev, "unable to retrieve handle to rpm\n");
-		return -ENODEV;
-	}
-
-	initdata = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node,
-					      &vreg->desc);
-	if (!initdata)
-		return -EINVAL;
-
-	key = "reg";
-	ret = of_property_read_u32(pdev->dev.of_node, key, &val);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to read %s\n", key);
-		return ret;
-	}
-	vreg->resource = val;
-
-	if ((vreg->parts->uV.mask || vreg->parts->mV.mask) &&
-	    (!initdata->constraints.min_uV || !initdata->constraints.max_uV)) {
-		dev_err(&pdev->dev, "no voltage specified for regulator\n");
-		return -EINVAL;
-	}
-
 	key = "bias-pull-down";
-	if (of_property_read_bool(pdev->dev.of_node, key)) {
+	if (of_property_read_bool(node, key)) {
 		ret = rpm_reg_set(vreg, &vreg->parts->pd, 1);
 		if (ret) {
-			dev_err(&pdev->dev, "%s is invalid", key);
+			dev_err(dev, "%s is invalid", key);
 			return ret;
 		}
 	}
 
 	if (vreg->parts->freq.mask) {
-		ret = rpm_reg_of_parse_freq(&pdev->dev, vreg);
+		ret = rpm_reg_of_parse_freq(dev, node, vreg);
 		if (ret < 0)
 			return ret;
 	}
 
 	if (vreg->parts->pm.mask) {
 		key = "qcom,power-mode-hysteretic";
-		pwm = !of_property_read_bool(pdev->dev.of_node, key);
+		pwm = !of_property_read_bool(node, key);
 
 		ret = rpm_reg_set(vreg, &vreg->parts->pm, pwm);
 		if (ret) {
-			dev_err(&pdev->dev, "failed to set power mode\n");
+			dev_err(dev, "failed to set power mode\n");
 			return ret;
 		}
 	}
@@ -736,11 +695,11 @@
 		force_mode = -1;
 
 		key = "qcom,force-mode";
-		ret = of_property_read_u32(pdev->dev.of_node, key, &val);
+		ret = of_property_read_u32(node, key, &val);
 		if (ret == -EINVAL) {
 			val = QCOM_RPM_FORCE_MODE_NONE;
 		} else if (ret < 0) {
-			dev_err(&pdev->dev, "failed to read %s\n", key);
+			dev_err(dev, "failed to read %s\n", key);
 			return ret;
 		}
 
@@ -775,25 +734,192 @@
 		}
 
 		if (force_mode == -1) {
-			dev_err(&pdev->dev, "invalid force mode\n");
+			dev_err(dev, "invalid force mode\n");
 			return -EINVAL;
 		}
 
 		ret = rpm_reg_set(vreg, &vreg->parts->fm, force_mode);
 		if (ret) {
-			dev_err(&pdev->dev, "failed to set force mode\n");
+			dev_err(dev, "failed to set force mode\n");
 			return ret;
 		}
 	}
 
-	config.dev = &pdev->dev;
-	config.init_data = initdata;
-	config.driver_data = vreg;
-	config.of_node = pdev->dev.of_node;
-	rdev = devm_regulator_register(&pdev->dev, &vreg->desc, &config);
-	if (IS_ERR(rdev)) {
-		dev_err(&pdev->dev, "can't register regulator\n");
-		return PTR_ERR(rdev);
+	return 0;
+}
+
+struct rpm_regulator_data {
+	const char *name;
+	int resource;
+	const struct qcom_rpm_reg *template;
+	const char *supply;
+};
+
+static const struct rpm_regulator_data rpm_pm8058_regulators[] = {
+	{ "l0",   QCOM_RPM_PM8058_LDO0,   &pm8058_nldo, "vdd_l0_l1_lvs"	},
+	{ "l1",   QCOM_RPM_PM8058_LDO1,   &pm8058_nldo, "vdd_l0_l1_lvs" },
+	{ "l2",   QCOM_RPM_PM8058_LDO2,   &pm8058_pldo, "vdd_l2_l11_l12" },
+	{ "l3",   QCOM_RPM_PM8058_LDO3,   &pm8058_pldo, "vdd_l3_l4_l5" },
+	{ "l4",   QCOM_RPM_PM8058_LDO4,   &pm8058_pldo, "vdd_l3_l4_l5" },
+	{ "l5",   QCOM_RPM_PM8058_LDO5,   &pm8058_pldo, "vdd_l3_l4_l5" },
+	{ "l6",   QCOM_RPM_PM8058_LDO6,   &pm8058_pldo, "vdd_l6_l7" },
+	{ "l7",   QCOM_RPM_PM8058_LDO7,   &pm8058_pldo, "vdd_l6_l7" },
+	{ "l8",   QCOM_RPM_PM8058_LDO8,   &pm8058_pldo, "vdd_l8" },
+	{ "l9",   QCOM_RPM_PM8058_LDO9,   &pm8058_pldo, "vdd_l9" },
+	{ "l10",  QCOM_RPM_PM8058_LDO10,  &pm8058_pldo, "vdd_l10" },
+	{ "l11",  QCOM_RPM_PM8058_LDO11,  &pm8058_pldo, "vdd_l2_l11_l12" },
+	{ "l12",  QCOM_RPM_PM8058_LDO12,  &pm8058_pldo, "vdd_l2_l11_l12" },
+	{ "l13",  QCOM_RPM_PM8058_LDO13,  &pm8058_pldo, "vdd_l13_l16" },
+	{ "l14",  QCOM_RPM_PM8058_LDO14,  &pm8058_pldo, "vdd_l14_l15" },
+	{ "l15",  QCOM_RPM_PM8058_LDO15,  &pm8058_pldo, "vdd_l14_l15" },
+	{ "l16",  QCOM_RPM_PM8058_LDO16,  &pm8058_pldo, "vdd_l13_l16" },
+	{ "l17",  QCOM_RPM_PM8058_LDO17,  &pm8058_pldo, "vdd_l17_l18" },
+	{ "l18",  QCOM_RPM_PM8058_LDO18,  &pm8058_pldo, "vdd_l17_l18" },
+	{ "l19",  QCOM_RPM_PM8058_LDO19,  &pm8058_pldo, "vdd_l19_l20" },
+	{ "l20",  QCOM_RPM_PM8058_LDO20,  &pm8058_pldo, "vdd_l19_l20" },
+	{ "l21",  QCOM_RPM_PM8058_LDO21,  &pm8058_nldo, "vdd_l21" },
+	{ "l22",  QCOM_RPM_PM8058_LDO22,  &pm8058_nldo, "vdd_l22" },
+	{ "l23",  QCOM_RPM_PM8058_LDO23,  &pm8058_nldo, "vdd_l23_l24_l25" },
+	{ "l24",  QCOM_RPM_PM8058_LDO24,  &pm8058_nldo, "vdd_l23_l24_l25" },
+	{ "l25",  QCOM_RPM_PM8058_LDO25,  &pm8058_nldo, "vdd_l23_l24_l25" },
+
+	{ "s0",   QCOM_RPM_PM8058_SMPS0,  &pm8058_smps, "vdd_s0" },
+	{ "s1",   QCOM_RPM_PM8058_SMPS1,  &pm8058_smps, "vdd_s1" },
+	{ "s2",   QCOM_RPM_PM8058_SMPS2,  &pm8058_smps, "vdd_s2" },
+	{ "s3",   QCOM_RPM_PM8058_SMPS3,  &pm8058_smps, "vdd_s3" },
+	{ "s4",   QCOM_RPM_PM8058_SMPS4,  &pm8058_smps, "vdd_s4" },
+
+	{ "lvs0", QCOM_RPM_PM8058_LVS0, &pm8058_switch, "vdd_l0_l1_lvs" },
+	{ "lvs1", QCOM_RPM_PM8058_LVS1, &pm8058_switch, "vdd_l0_l1_lvs" },
+
+	{ "ncp",  QCOM_RPM_PM8058_NCP, &pm8058_ncp, "vdd_ncp" },
+	{ }
+};
+
+static const struct rpm_regulator_data rpm_pm8901_regulators[] = {
+	{ "l0",   QCOM_RPM_PM8901_LDO0, &pm8901_nldo, "vdd_l0" },
+	{ "l1",   QCOM_RPM_PM8901_LDO1, &pm8901_pldo, "vdd_l1" },
+	{ "l2",   QCOM_RPM_PM8901_LDO2, &pm8901_pldo, "vdd_l2" },
+	{ "l3",   QCOM_RPM_PM8901_LDO3, &pm8901_pldo, "vdd_l3" },
+	{ "l4",   QCOM_RPM_PM8901_LDO4, &pm8901_pldo, "vdd_l4" },
+	{ "l5",   QCOM_RPM_PM8901_LDO5, &pm8901_pldo, "vdd_l5" },
+	{ "l6",   QCOM_RPM_PM8901_LDO6, &pm8901_pldo, "vdd_l6" },
+
+	{ "s0",   QCOM_RPM_PM8901_SMPS0, &pm8901_ftsmps, "vdd_s0" },
+	{ "s1",   QCOM_RPM_PM8901_SMPS1, &pm8901_ftsmps, "vdd_s1" },
+	{ "s2",   QCOM_RPM_PM8901_SMPS2, &pm8901_ftsmps, "vdd_s2" },
+	{ "s3",   QCOM_RPM_PM8901_SMPS3, &pm8901_ftsmps, "vdd_s3" },
+	{ "s4",   QCOM_RPM_PM8901_SMPS4, &pm8901_ftsmps, "vdd_s4" },
+
+	{ "lvs0", QCOM_RPM_PM8901_LVS0, &pm8901_switch, "lvs0_in" },
+	{ "lvs1", QCOM_RPM_PM8901_LVS1, &pm8901_switch, "lvs1_in" },
+	{ "lvs2", QCOM_RPM_PM8901_LVS2, &pm8901_switch, "lvs2_in" },
+	{ "lvs3", QCOM_RPM_PM8901_LVS3, &pm8901_switch, "lvs3_in" },
+
+	{ "mvs", QCOM_RPM_PM8901_MVS, &pm8901_switch, "mvs_in" },
+	{ }
+};
+
+static const struct rpm_regulator_data rpm_pm8921_regulators[] = {
+	{ "s1",  QCOM_RPM_PM8921_SMPS1, &pm8921_smps, "vdd_s1" },
+	{ "s2",  QCOM_RPM_PM8921_SMPS2, &pm8921_smps, "vdd_s2" },
+	{ "s3",  QCOM_RPM_PM8921_SMPS3, &pm8921_smps },
+	{ "s4",  QCOM_RPM_PM8921_SMPS4, &pm8921_smps, "vdd_s4" },
+	{ "s7",  QCOM_RPM_PM8921_SMPS7, &pm8921_smps, "vdd_s7" },
+	{ "s8",  QCOM_RPM_PM8921_SMPS8, &pm8921_smps, "vdd_s8"  },
+
+	{ "l1",  QCOM_RPM_PM8921_LDO1, &pm8921_nldo, "vdd_l1_l2_l12_l18" },
+	{ "l2",  QCOM_RPM_PM8921_LDO2, &pm8921_nldo, "vdd_l1_l2_l12_l18" },
+	{ "l3",  QCOM_RPM_PM8921_LDO3, &pm8921_pldo, "vdd_l3_l15_l17" },
+	{ "l4",  QCOM_RPM_PM8921_LDO4, &pm8921_pldo, "vdd_l4_l14" },
+	{ "l5",  QCOM_RPM_PM8921_LDO5, &pm8921_pldo, "vdd_l5_l8_l16" },
+	{ "l6",  QCOM_RPM_PM8921_LDO6, &pm8921_pldo, "vdd_l6_l7" },
+	{ "l7",  QCOM_RPM_PM8921_LDO7, &pm8921_pldo, "vdd_l6_l7" },
+	{ "l8",  QCOM_RPM_PM8921_LDO8, &pm8921_pldo, "vdd_l5_l8_l16" },
+	{ "l9",  QCOM_RPM_PM8921_LDO9, &pm8921_pldo, "vdd_l9_l11" },
+	{ "l10", QCOM_RPM_PM8921_LDO10, &pm8921_pldo, "vdd_l10_l22" },
+	{ "l11", QCOM_RPM_PM8921_LDO11, &pm8921_pldo, "vdd_l9_l11" },
+	{ "l12", QCOM_RPM_PM8921_LDO12, &pm8921_nldo, "vdd_l1_l2_l12_l18" },
+	{ "l14", QCOM_RPM_PM8921_LDO14, &pm8921_pldo, "vdd_l4_l14" },
+	{ "l15", QCOM_RPM_PM8921_LDO15, &pm8921_pldo, "vdd_l3_l15_l17" },
+	{ "l16", QCOM_RPM_PM8921_LDO16, &pm8921_pldo, "vdd_l5_l8_l16" },
+	{ "l17", QCOM_RPM_PM8921_LDO17, &pm8921_pldo, "vdd_l3_l15_l17" },
+	{ "l18", QCOM_RPM_PM8921_LDO18, &pm8921_nldo, "vdd_l1_l2_l12_l18" },
+	{ "l21", QCOM_RPM_PM8921_LDO21, &pm8921_pldo, "vdd_l21_l23_l29" },
+	{ "l22", QCOM_RPM_PM8921_LDO22, &pm8921_pldo, "vdd_l10_l22" },
+	{ "l23", QCOM_RPM_PM8921_LDO23, &pm8921_pldo, "vdd_l21_l23_l29" },
+	{ "l24", QCOM_RPM_PM8921_LDO24, &pm8921_nldo1200, "vdd_l24" },
+	{ "l25", QCOM_RPM_PM8921_LDO25, &pm8921_nldo1200, "vdd_l25" },
+	{ "l26", QCOM_RPM_PM8921_LDO26, &pm8921_nldo1200, "vdd_l26" },
+	{ "l27", QCOM_RPM_PM8921_LDO27, &pm8921_nldo1200, "vdd_l27" },
+	{ "l28", QCOM_RPM_PM8921_LDO28, &pm8921_nldo1200, "vdd_l28" },
+	{ "l29", QCOM_RPM_PM8921_LDO29, &pm8921_pldo, "vdd_l21_l23_l29" },
+
+	{ "lvs1", QCOM_RPM_PM8921_LVS1, &pm8921_switch, "vin_lvs1_3_6" },
+	{ "lvs2", QCOM_RPM_PM8921_LVS2, &pm8921_switch, "vin_lvs2" },
+	{ "lvs3", QCOM_RPM_PM8921_LVS3, &pm8921_switch, "vin_lvs1_3_6" },
+	{ "lvs4", QCOM_RPM_PM8921_LVS4, &pm8921_switch, "vin_lvs4_5_7" },
+	{ "lvs5", QCOM_RPM_PM8921_LVS5, &pm8921_switch, "vin_lvs4_5_7" },
+	{ "lvs6", QCOM_RPM_PM8921_LVS6, &pm8921_switch, "vin_lvs1_3_6" },
+	{ "lvs7", QCOM_RPM_PM8921_LVS7, &pm8921_switch, "vin_lvs4_5_7" },
+
+	{ "usb-switch", QCOM_RPM_USB_OTG_SWITCH, &pm8921_switch, "vin_5vs" },
+	{ "hdmi-switch", QCOM_RPM_HDMI_SWITCH, &pm8921_switch, "vin_5vs" },
+	{ "ncp", QCOM_RPM_PM8921_NCP, &pm8921_ncp, "vdd_ncp" },
+	{ }
+};
+
+static const struct of_device_id rpm_of_match[] = {
+	{ .compatible = "qcom,rpm-pm8058-regulators", .data = &rpm_pm8058_regulators },
+	{ .compatible = "qcom,rpm-pm8901-regulators", .data = &rpm_pm8901_regulators },
+	{ .compatible = "qcom,rpm-pm8921-regulators", .data = &rpm_pm8921_regulators },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rpm_of_match);
+
+static int rpm_reg_probe(struct platform_device *pdev)
+{
+	const struct rpm_regulator_data *reg;
+	const struct of_device_id *match;
+	struct regulator_config config = { };
+	struct regulator_dev *rdev;
+	struct qcom_rpm_reg *vreg;
+	struct qcom_rpm *rpm;
+
+	rpm = dev_get_drvdata(pdev->dev.parent);
+	if (!rpm) {
+		dev_err(&pdev->dev, "unable to retrieve handle to rpm\n");
+		return -ENODEV;
+	}
+
+	match = of_match_device(rpm_of_match, &pdev->dev);
+	for (reg = match->data; reg->name; reg++) {
+		vreg = devm_kmalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
+		if (!vreg)
+			return -ENOMEM;
+
+		memcpy(vreg, reg->template, sizeof(*vreg));
+		mutex_init(&vreg->lock);
+
+		vreg->dev = &pdev->dev;
+		vreg->resource = reg->resource;
+		vreg->rpm = rpm;
+
+		vreg->desc.id = -1;
+		vreg->desc.owner = THIS_MODULE;
+		vreg->desc.type = REGULATOR_VOLTAGE;
+		vreg->desc.name = reg->name;
+		vreg->desc.supply_name = reg->supply;
+		vreg->desc.of_match = reg->name;
+		vreg->desc.of_parse_cb = rpm_reg_of_parse;
+
+		config.dev = &pdev->dev;
+		config.driver_data = vreg;
+		rdev = devm_regulator_register(&pdev->dev, &vreg->desc, &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "failed to register %s\n", reg->name);
+			return PTR_ERR(rdev);
+		}
 	}
 
 	return 0;
diff --git a/drivers/regulator/stw481x-vmmc.c b/drivers/regulator/stw481x-vmmc.c
index 89025f5..7d2ae3e 100644
--- a/drivers/regulator/stw481x-vmmc.c
+++ b/drivers/regulator/stw481x-vmmc.c
@@ -56,6 +56,7 @@
 {
 	struct stw481x *stw481x = dev_get_platdata(&pdev->dev);
 	struct regulator_config config = { };
+	struct regulator_dev *rdev;
 	int ret;
 
 	/* First disable the external VMMC if it's active */
@@ -75,12 +76,11 @@
 						      pdev->dev.of_node,
 						      &vmmc_regulator);
 
-	stw481x->vmmc_regulator = devm_regulator_register(&pdev->dev,
-						&vmmc_regulator, &config);
-	if (IS_ERR(stw481x->vmmc_regulator)) {
+	rdev = devm_regulator_register(&pdev->dev, &vmmc_regulator, &config);
+	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev,
 			"error initializing STw481x VMMC regulator\n");
-		return PTR_ERR(stw481x->vmmc_regulator);
+		return PTR_ERR(rdev);
 	}
 
 	dev_info(&pdev->dev, "initialized STw481x VMMC regulator\n");
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 7ec7c39..95f6b04 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1151,17 +1151,16 @@
 static irqreturn_t pmic_uv_handler(int irq, void *data)
 {
 	struct regulator_dev *rdev = (struct regulator_dev *)data;
-	struct wm8350 *wm8350 = rdev_get_drvdata(rdev);
 
 	mutex_lock(&rdev->mutex);
 	if (irq == WM8350_IRQ_CS1 || irq == WM8350_IRQ_CS2)
 		regulator_notifier_call_chain(rdev,
 					      REGULATOR_EVENT_REGULATION_OUT,
-					      wm8350);
+					      NULL);
 	else
 		regulator_notifier_call_chain(rdev,
 					      REGULATOR_EVENT_UNDER_VOLTAGE,
-					      wm8350);
+					      NULL);
 	mutex_unlock(&rdev->mutex);
 
 	return IRQ_HANDLED;
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 472a5ad..c29ba7e 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -55,7 +55,7 @@
 	struct timespec64	delta, delta_delta;
 	int err;
 
-	if (has_persistent_clock())
+	if (timekeeping_rtc_skipsuspend())
 		return 0;
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
@@ -102,7 +102,7 @@
 	struct timespec64	sleep_time;
 	int err;
 
-	if (has_persistent_clock())
+	if (timekeeping_rtc_skipresume())
 		return 0;
 
 	rtc_hctosys_ret = -ENODEV;
@@ -117,10 +117,6 @@
 		return 0;
 	}
 
-	if (rtc_valid_tm(&tm) != 0) {
-		pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
-		return 0;
-	}
 	new_rtc.tv_sec = rtc_tm_to_time64(&tm);
 	new_rtc.tv_nsec = 0;
 
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 37215cf..d43ee40 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -72,7 +72,11 @@
 		err = -ENODEV;
 	else if (rtc->ops->set_time)
 		err = rtc->ops->set_time(rtc->dev.parent, tm);
-	else if (rtc->ops->set_mmss) {
+	else if (rtc->ops->set_mmss64) {
+		time64_t secs64 = rtc_tm_to_time64(tm);
+
+		err = rtc->ops->set_mmss64(rtc->dev.parent, secs64);
+	} else if (rtc->ops->set_mmss) {
 		time64_t secs64 = rtc_tm_to_time64(tm);
 		err = rtc->ops->set_mmss(rtc->dev.parent, secs64);
 	} else
@@ -96,6 +100,8 @@
 
 	if (!rtc->ops)
 		err = -ENODEV;
+	else if (rtc->ops->set_mmss64)
+		err = rtc->ops->set_mmss64(rtc->dev.parent, secs);
 	else if (rtc->ops->set_mmss)
 		err = rtc->ops->set_mmss(rtc->dev.parent, secs);
 	else if (rtc->ops->read_time && rtc->ops->set_time) {
diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c
index 1d0340f..9b725c5 100644
--- a/drivers/rtc/rtc-ab3100.c
+++ b/drivers/rtc/rtc-ab3100.c
@@ -43,21 +43,21 @@
 /*
  * RTC clock functions and device struct declaration
  */
-static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs)
+static int ab3100_rtc_set_mmss(struct device *dev, time64_t secs)
 {
 	u8 regs[] = {AB3100_TI0, AB3100_TI1, AB3100_TI2,
 		     AB3100_TI3, AB3100_TI4, AB3100_TI5};
 	unsigned char buf[6];
-	u64 fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2;
+	u64 hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2;
 	int err = 0;
 	int i;
 
-	buf[0] = (fat_time) & 0xFF;
-	buf[1] = (fat_time >> 8) & 0xFF;
-	buf[2] = (fat_time >> 16) & 0xFF;
-	buf[3] = (fat_time >> 24) & 0xFF;
-	buf[4] = (fat_time >> 32) & 0xFF;
-	buf[5] = (fat_time >> 40) & 0xFF;
+	buf[0] = (hw_counter) & 0xFF;
+	buf[1] = (hw_counter >> 8) & 0xFF;
+	buf[2] = (hw_counter >> 16) & 0xFF;
+	buf[3] = (hw_counter >> 24) & 0xFF;
+	buf[4] = (hw_counter >> 32) & 0xFF;
+	buf[5] = (hw_counter >> 40) & 0xFF;
 
 	for (i = 0; i < 6; i++) {
 		err = abx500_set_register_interruptible(dev, 0,
@@ -75,7 +75,7 @@
 
 static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	unsigned long time;
+	time64_t time;
 	u8 rtcval;
 	int err;
 
@@ -88,7 +88,7 @@
 		dev_info(dev, "clock not set (lost power)");
 		return -EINVAL;
 	} else {
-		u64 fat_time;
+		u64 hw_counter;
 		u8 buf[6];
 
 		/* Read out time registers */
@@ -98,22 +98,21 @@
 		if (err != 0)
 			return err;
 
-		fat_time = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) |
+		hw_counter = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) |
 			((u64) buf[3] << 24) | ((u64) buf[2] << 16) |
 			((u64) buf[1] << 8) | (u64) buf[0];
-		time = (unsigned long) (fat_time /
-					(u64) (AB3100_RTC_CLOCK_RATE * 2));
+		time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2);
 	}
 
-	rtc_time_to_tm(time, tm);
+	rtc_time64_to_tm(time, tm);
 
 	return rtc_valid_tm(tm);
 }
 
 static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-	unsigned long time;
-	u64 fat_time;
+	time64_t time;
+	u64 hw_counter;
 	u8 buf[6];
 	u8 rtcval;
 	int err;
@@ -134,11 +133,11 @@
 						     AB3100_AL0, buf, 4);
 	if (err)
 		return err;
-	fat_time = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) |
+	hw_counter = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) |
 		((u64) buf[1] << 24) | ((u64) buf[0] << 16);
-	time = (unsigned long) (fat_time / (u64) (AB3100_RTC_CLOCK_RATE * 2));
+	time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2);
 
-	rtc_time_to_tm(time, &alarm->time);
+	rtc_time64_to_tm(time, &alarm->time);
 
 	return rtc_valid_tm(&alarm->time);
 }
@@ -147,17 +146,17 @@
 {
 	u8 regs[] = {AB3100_AL0, AB3100_AL1, AB3100_AL2, AB3100_AL3};
 	unsigned char buf[4];
-	unsigned long secs;
-	u64 fat_time;
+	time64_t secs;
+	u64 hw_counter;
 	int err;
 	int i;
 
-	rtc_tm_to_time(&alarm->time, &secs);
-	fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2;
-	buf[0] = (fat_time >> 16) & 0xFF;
-	buf[1] = (fat_time >> 24) & 0xFF;
-	buf[2] = (fat_time >> 32) & 0xFF;
-	buf[3] = (fat_time >> 40) & 0xFF;
+	secs = rtc_tm_to_time64(&alarm->time);
+	hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2;
+	buf[0] = (hw_counter >> 16) & 0xFF;
+	buf[1] = (hw_counter >> 24) & 0xFF;
+	buf[2] = (hw_counter >> 32) & 0xFF;
+	buf[3] = (hw_counter >> 40) & 0xFF;
 
 	/* Set the alarm */
 	for (i = 0; i < 4; i++) {
@@ -193,7 +192,7 @@
 
 static const struct rtc_class_ops ab3100_rtc_ops = {
 	.read_time	= ab3100_rtc_read_time,
-	.set_mmss	= ab3100_rtc_set_mmss,
+	.set_mmss64	= ab3100_rtc_set_mmss,
 	.read_alarm	= ab3100_rtc_read_alarm,
 	.set_alarm	= ab3100_rtc_set_alarm,
 	.alarm_irq_enable = ab3100_rtc_irq_enable,
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 5bce904..32df1d8 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -83,20 +83,19 @@
 			return ret;
 	} while (days1 != days2);
 
-	rtc_time_to_tm(days1 * SEC_PER_DAY + seconds, tm);
+	rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm);
 
 	return rtc_valid_tm(tm);
 }
 
-static int mc13xxx_rtc_set_mmss(struct device *dev, unsigned long secs)
+static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs)
 {
 	struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
 	unsigned int seconds, days;
 	unsigned int alarmseconds;
 	int ret;
 
-	seconds = secs % SEC_PER_DAY;
-	days = secs / SEC_PER_DAY;
+	days = div_s64_rem(secs, SEC_PER_DAY, &seconds);
 
 	mc13xxx_lock(priv->mc13xxx);
 
@@ -159,7 +158,7 @@
 {
 	struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
 	unsigned seconds, days;
-	unsigned long s1970;
+	time64_t s1970;
 	int enabled, pending;
 	int ret;
 
@@ -189,10 +188,10 @@
 	alarm->enabled = enabled;
 	alarm->pending = pending;
 
-	s1970 = days * SEC_PER_DAY + seconds;
+	s1970 = (time64_t)days * SEC_PER_DAY + seconds;
 
-	rtc_time_to_tm(s1970, &alarm->time);
-	dev_dbg(dev, "%s: %lu\n", __func__, s1970);
+	rtc_time64_to_tm(s1970, &alarm->time);
+	dev_dbg(dev, "%s: %lld\n", __func__, (long long)s1970);
 
 	return 0;
 }
@@ -200,8 +199,8 @@
 static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
 	struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
-	unsigned long s1970;
-	unsigned seconds, days;
+	time64_t s1970;
+	u32 seconds, days;
 	int ret;
 
 	mc13xxx_lock(priv->mc13xxx);
@@ -215,20 +214,17 @@
 	if (unlikely(ret))
 		goto out;
 
-	ret = rtc_tm_to_time(&alarm->time, &s1970);
-	if (unlikely(ret))
-		goto out;
+	s1970 = rtc_tm_to_time64(&alarm->time);
 
-	dev_dbg(dev, "%s: o%2.s %lu\n", __func__, alarm->enabled ? "n" : "ff",
-			s1970);
+	dev_dbg(dev, "%s: o%2.s %lld\n", __func__, alarm->enabled ? "n" : "ff",
+			(long long)s1970);
 
 	ret = mc13xxx_rtc_irq_enable_unlocked(dev, alarm->enabled,
 			MC13XXX_IRQ_TODA);
 	if (unlikely(ret))
 		goto out;
 
-	seconds = s1970 % SEC_PER_DAY;
-	days = s1970 / SEC_PER_DAY;
+	days = div_s64_rem(s1970, SEC_PER_DAY, &seconds);
 
 	ret = mc13xxx_reg_write(priv->mc13xxx, MC13XXX_RTCDAYA, days);
 	if (unlikely(ret))
@@ -268,7 +264,7 @@
 
 static const struct rtc_class_ops mc13xxx_rtc_ops = {
 	.read_time = mc13xxx_rtc_read_time,
-	.set_mmss = mc13xxx_rtc_set_mmss,
+	.set_mmss64 = mc13xxx_rtc_set_mmss,
 	.read_alarm = mc13xxx_rtc_read_alarm,
 	.set_alarm = mc13xxx_rtc_set_alarm,
 	.alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 3c3f8d1..09d422b 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -106,7 +106,7 @@
  * This function is used to obtain the RTC time or the alarm value in
  * second.
  */
-static u32 get_alarm_or_time(struct device *dev, int time_alarm)
+static time64_t get_alarm_or_time(struct device *dev, int time_alarm)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
@@ -129,29 +129,28 @@
 	hr = hr_min >> 8;
 	min = hr_min & 0xff;
 
-	return (((day * 24 + hr) * 60) + min) * 60 + sec;
+	return ((((time64_t)day * 24 + hr) * 60) + min) * 60 + sec;
 }
 
 /*
  * This function sets the RTC alarm value or the time value.
  */
-static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time)
+static void set_alarm_or_time(struct device *dev, int time_alarm, time64_t time)
 {
-	u32 day, hr, min, sec, temp;
+	u32 tod, day, hr, min, sec, temp;
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
 
-	day = time / 86400;
-	time -= day * 86400;
+	day = div_s64_rem(time, 86400, &tod);
 
 	/* time is within a day now */
-	hr = time / 3600;
-	time -= hr * 3600;
+	hr = tod / 3600;
+	tod -= hr * 3600;
 
 	/* time is within an hour now */
-	min = time / 60;
-	sec = time - min * 60;
+	min = tod / 60;
+	sec = tod - min * 60;
 
 	temp = (hr << 8) + min;
 
@@ -173,29 +172,18 @@
  * This function updates the RTC alarm registers and then clears all the
  * interrupt status bits.
  */
-static int rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
+static void rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
 {
-	struct rtc_time alarm_tm, now_tm;
-	unsigned long now, time;
+	time64_t time;
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
 
-	now = get_alarm_or_time(dev, MXC_RTC_TIME);
-	rtc_time_to_tm(now, &now_tm);
-	alarm_tm.tm_year = now_tm.tm_year;
-	alarm_tm.tm_mon = now_tm.tm_mon;
-	alarm_tm.tm_mday = now_tm.tm_mday;
-	alarm_tm.tm_hour = alrm->tm_hour;
-	alarm_tm.tm_min = alrm->tm_min;
-	alarm_tm.tm_sec = alrm->tm_sec;
-	rtc_tm_to_time(&alarm_tm, &time);
+	time = rtc_tm_to_time64(alrm);
 
 	/* clear all the interrupt status bits */
 	writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR);
 	set_alarm_or_time(dev, MXC_RTC_ALARM, time);
-
-	return 0;
 }
 
 static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit,
@@ -283,14 +271,14 @@
  */
 static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	u32 val;
+	time64_t val;
 
 	/* Avoid roll-over from reading the different registers */
 	do {
 		val = get_alarm_or_time(dev, MXC_RTC_TIME);
 	} while (val != get_alarm_or_time(dev, MXC_RTC_TIME));
 
-	rtc_time_to_tm(val, tm);
+	rtc_time64_to_tm(val, tm);
 
 	return 0;
 }
@@ -298,7 +286,7 @@
 /*
  * This function sets the internal RTC time based on tm in Gregorian date.
  */
-static int mxc_rtc_set_mmss(struct device *dev, unsigned long time)
+static int mxc_rtc_set_mmss(struct device *dev, time64_t time)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
@@ -309,9 +297,9 @@
 	if (is_imx1_rtc(pdata)) {
 		struct rtc_time tm;
 
-		rtc_time_to_tm(time, &tm);
+		rtc_time64_to_tm(time, &tm);
 		tm.tm_year = 70;
-		rtc_tm_to_time(&tm, &time);
+		time = rtc_tm_to_time64(&tm);
 	}
 
 	/* Avoid roll-over from reading the different registers */
@@ -333,7 +321,7 @@
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
 
-	rtc_time_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
+	rtc_time64_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
 	alrm->pending = ((readw(ioaddr + RTC_RTCISR) & RTC_ALM_BIT)) ? 1 : 0;
 
 	return 0;
@@ -346,11 +334,8 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = rtc_update_alarm(dev, &alrm->time);
-	if (ret)
-		return ret;
+	rtc_update_alarm(dev, &alrm->time);
 
 	memcpy(&pdata->g_rtc_alarm, &alrm->time, sizeof(struct rtc_time));
 	mxc_rtc_irq_enable(dev, RTC_ALM_BIT, alrm->enabled);
@@ -362,7 +347,7 @@
 static struct rtc_class_ops mxc_rtc_ops = {
 	.release		= mxc_rtc_release,
 	.read_time		= mxc_rtc_read_time,
-	.set_mmss		= mxc_rtc_set_mmss,
+	.set_mmss64		= mxc_rtc_set_mmss,
 	.read_alarm		= mxc_rtc_read_alarm,
 	.set_alarm		= mxc_rtc_set_alarm,
 	.alarm_irq_enable	= mxc_rtc_alarm_irq_enable,
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index 8f86fa9..3a2da4c 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -13,6 +13,10 @@
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 
+static int test_mmss64;
+module_param(test_mmss64, int, 0644);
+MODULE_PARM_DESC(test_mmss64, "Test struct rtc_class_ops.set_mmss64().");
+
 static struct platform_device *test0 = NULL, *test1 = NULL;
 
 static int test_rtc_read_alarm(struct device *dev,
@@ -30,7 +34,13 @@
 static int test_rtc_read_time(struct device *dev,
 	struct rtc_time *tm)
 {
-	rtc_time_to_tm(get_seconds(), tm);
+	rtc_time64_to_tm(ktime_get_real_seconds(), tm);
+	return 0;
+}
+
+static int test_rtc_set_mmss64(struct device *dev, time64_t secs)
+{
+	dev_info(dev, "%s, secs = %lld\n", __func__, (long long)secs);
 	return 0;
 }
 
@@ -55,7 +65,7 @@
 	return 0;
 }
 
-static const struct rtc_class_ops test_rtc_ops = {
+static struct rtc_class_ops test_rtc_ops = {
 	.proc = test_rtc_proc,
 	.read_time = test_rtc_read_time,
 	.read_alarm = test_rtc_read_alarm,
@@ -101,6 +111,11 @@
 	int err;
 	struct rtc_device *rtc;
 
+	if (test_mmss64) {
+		test_rtc_ops.set_mmss64 = test_rtc_set_mmss64;
+		test_rtc_ops.set_mmss = NULL;
+	}
+
 	rtc = devm_rtc_device_register(&plat_dev->dev, "test",
 				&test_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc)) {
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
index eb71872..7728d5e 100644
--- a/drivers/rtc/systohc.c
+++ b/drivers/rtc/systohc.c
@@ -11,7 +11,7 @@
  * rtc_set_ntp_time - Save NTP synchronized time to the RTC
  * @now: Current time of day
  *
- * Replacement for the NTP platform function update_persistent_clock
+ * Replacement for the NTP platform function update_persistent_clock64
  * that stores time for later retrieval by rtc_hctosys.
  *
  * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
@@ -35,7 +35,10 @@
 	if (rtc) {
 		/* rtc_hctosys exclusively uses UTC, so we call set_time here,
 		 * not set_mmss. */
-		if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
+		if (rtc->ops &&
+		    (rtc->ops->set_time ||
+		     rtc->ops->set_mmss64 ||
+		     rtc->ops->set_mmss))
 			err = rtc_set_time(rtc, &tm);
 		rtc_class_close(rtc);
 	}
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index a7cc618..923a2b5 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -5734,9 +5734,9 @@
 hba_free:
 	if (phba->msix_enabled)
 		pci_disable_msix(phba->pcidev);
-	iscsi_host_remove(phba->shost);
 	pci_dev_put(phba->pcidev);
 	iscsi_host_free(phba->shost);
+	pci_set_drvdata(pcidev, NULL);
 disable_pci:
 	pci_disable_device(pcidev);
 	return ret;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 4cdaffc..c95a4e9 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -26,6 +26,7 @@
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/jiffies.h>
+#include <asm/unaligned.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -2586,3 +2587,33 @@
 	}
 }
 EXPORT_SYMBOL(scsi_build_sense_buffer);
+
+/**
+ * scsi_set_sense_information - set the information field in a
+ *		formatted sense data buffer
+ * @buf:	Where to build sense data
+ * @info:	64-bit information value to be set
+ *
+ **/
+void scsi_set_sense_information(u8 *buf, u64 info)
+{
+	if ((buf[0] & 0x7f) == 0x72) {
+		u8 *ucp, len;
+
+		len = buf[7];
+		ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0);
+		if (!ucp) {
+			buf[7] = len + 0xa;
+			ucp = buf + 8 + len;
+		}
+		ucp[0] = 0;
+		ucp[1] = 0xa;
+		ucp[2] = 0x80; /* Valid bit */
+		ucp[3] = 0;
+		put_unaligned_be64(info, &ucp[4]);
+	} else if ((buf[0] & 0x7f) == 0x70) {
+		buf[0] |= 0x80;
+		put_unaligned_be64(info, &buf[3]);
+	}
+}
+EXPORT_SYMBOL(scsi_set_sense_information);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 54d7a6c..b1a2631 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1311,9 +1311,11 @@
 				    "rejecting I/O to dead device\n");
 			ret = BLKPREP_KILL;
 			break;
-		case SDEV_QUIESCE:
 		case SDEV_BLOCK:
 		case SDEV_CREATED_BLOCK:
+			ret = BLKPREP_DEFER;
+			break;
+		case SDEV_QUIESCE:
 			/*
 			 * If the devices is blocked we defer normal commands.
 			 */
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5d60a86..2aa85e3 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4225,22 +4225,15 @@
 static int ufshcd_config_vreg_load(struct device *dev, struct ufs_vreg *vreg,
 				   int ua)
 {
-	int ret = 0;
-	struct regulator *reg = vreg->reg;
-	const char *name = vreg->name;
+	int ret;
 
-	BUG_ON(!vreg);
+	if (!vreg)
+		return 0;
 
-	ret = regulator_set_optimum_mode(reg, ua);
-	if (ret >= 0) {
-		/*
-		 * regulator_set_optimum_mode() returns new regulator
-		 * mode upon success.
-		 */
-		ret = 0;
-	} else {
-		dev_err(dev, "%s: %s set optimum mode(ua=%d) failed, err=%d\n",
-				__func__, name, ua, ret);
+	ret = regulator_set_load(vreg->reg, ua);
+	if (ret < 0) {
+		dev_err(dev, "%s: %s set load (ua=%d) failed, err=%d\n",
+				__func__, vreg->name, ua, ret);
 	}
 
 	return ret;
@@ -4249,18 +4242,12 @@
 static inline int ufshcd_config_vreg_lpm(struct ufs_hba *hba,
 					 struct ufs_vreg *vreg)
 {
-	if (!vreg)
-		return 0;
-
 	return ufshcd_config_vreg_load(hba->dev, vreg, UFS_VREG_LPM_LOAD_UA);
 }
 
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
 					 struct ufs_vreg *vreg)
 {
-	if (!vreg)
-		return 0;
-
 	return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA);
 }
 
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index ab8dfbe..198f96b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -159,10 +159,9 @@
 
 config SPI_CADENCE
 	tristate "Cadence SPI controller"
-	depends on ARM
 	help
 	  This selects the Cadence SPI controller master driver
-	  used by Xilinx Zynq.
+	  used by Xilinx Zynq and ZynqMP.
 
 config SPI_CLPS711X
 	tristate "CLPS711X host SPI controller"
@@ -632,7 +631,7 @@
 
 config SPI_DW_MID_DMA
 	bool "DMA support for DW SPI controller on Intel MID platform"
-	depends on SPI_DW_PCI && INTEL_MID_DMAC
+	depends on SPI_DW_PCI && DW_DMAC_PCI
 
 config SPI_DW_MMIO
 	tristate "Memory-mapped io interface driver for DW SPI core"
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 06de340..a2f40b1 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -180,11 +180,17 @@
 	  | SPI_BF(name, value))
 
 /* Register access macros */
+#ifdef CONFIG_AVR32
 #define spi_readl(port, reg) \
 	__raw_readl((port)->regs + SPI_##reg)
 #define spi_writel(port, reg, value) \
 	__raw_writel((value), (port)->regs + SPI_##reg)
-
+#else
+#define spi_readl(port, reg) \
+	readl_relaxed((port)->regs + SPI_##reg)
+#define spi_writel(port, reg, value) \
+	writel_relaxed((value), (port)->regs + SPI_##reg)
+#endif
 /* use PIO for small transfers, avoiding DMA setup/teardown overhead and
  * cache operations; better heuristics consider wordsize and bitrate.
  */
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 419a782..f63864a 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2012 Chris Boot
  * Copyright (C) 2013 Stephen Warren
+ * Copyright (C) 2015 Martin Sperl
  *
  * This driver is inspired by:
  * spi-ath79.c, Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
@@ -29,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
 
@@ -66,8 +68,10 @@
 #define BCM2835_SPI_CS_CS_10		0x00000002
 #define BCM2835_SPI_CS_CS_01		0x00000001
 
-#define BCM2835_SPI_TIMEOUT_MS	30000
-#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_NO_CS)
+#define BCM2835_SPI_POLLING_LIMIT_US	30
+#define BCM2835_SPI_TIMEOUT_MS		30000
+#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
+				| SPI_NO_CS | SPI_3WIRE)
 
 #define DRV_NAME	"spi-bcm2835"
 
@@ -75,10 +79,10 @@
 	void __iomem *regs;
 	struct clk *clk;
 	int irq;
-	struct completion done;
 	const u8 *tx_buf;
 	u8 *rx_buf;
-	int len;
+	int tx_len;
+	int rx_len;
 };
 
 static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
@@ -91,205 +95,315 @@
 	writel(val, bs->regs + reg);
 }
 
-static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs, int len)
+static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs)
 {
 	u8 byte;
 
-	while (len--) {
+	while ((bs->rx_len) &&
+	       (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_RXD)) {
 		byte = bcm2835_rd(bs, BCM2835_SPI_FIFO);
 		if (bs->rx_buf)
 			*bs->rx_buf++ = byte;
+		bs->rx_len--;
 	}
 }
 
-static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs, int len)
+static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs)
 {
 	u8 byte;
 
-	if (len > bs->len)
-		len = bs->len;
-
-	while (len--) {
+	while ((bs->tx_len) &&
+	       (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_TXD)) {
 		byte = bs->tx_buf ? *bs->tx_buf++ : 0;
 		bcm2835_wr(bs, BCM2835_SPI_FIFO, byte);
-		bs->len--;
+		bs->tx_len--;
 	}
 }
 
+static void bcm2835_spi_reset_hw(struct spi_master *master)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+	/* Disable SPI interrupts and transfer */
+	cs &= ~(BCM2835_SPI_CS_INTR |
+		BCM2835_SPI_CS_INTD |
+		BCM2835_SPI_CS_TA);
+	/* and reset RX/TX FIFOS */
+	cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
+
+	/* and reset the SPI_HW */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+}
+
 static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 {
 	struct spi_master *master = dev_id;
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
-	/*
-	 * RXR - RX needs Reading. This means 12 (or more) bytes have been
-	 * transmitted and hence 12 (or more) bytes have been received.
-	 *
-	 * The FIFO is 16-bytes deep. We check for this interrupt to keep the
-	 * FIFO full; we have a 4-byte-time buffer for IRQ latency. We check
-	 * this before DONE (TX empty) just in case we delayed processing this
-	 * interrupt for some reason.
-	 *
-	 * We only check for this case if we have more bytes to TX; at the end
-	 * of the transfer, we ignore this pipelining optimization, and let
-	 * bcm2835_spi_finish_transfer() drain the RX FIFO.
-	 */
-	if (bs->len && (cs & BCM2835_SPI_CS_RXR)) {
-		/* Read 12 bytes of data */
-		bcm2835_rd_fifo(bs, 12);
+	/* Read as many bytes as possible from FIFO */
+	bcm2835_rd_fifo(bs);
+	/* Write as many bytes as possible to FIFO */
+	bcm2835_wr_fifo(bs);
 
-		/* Write up to 12 bytes */
-		bcm2835_wr_fifo(bs, 12);
-
-		/*
-		 * We must have written something to the TX FIFO due to the
-		 * bs->len check above, so cannot be DONE. Hence, return
-		 * early. Note that DONE could also be set if we serviced an
-		 * RXR interrupt really late.
-		 */
-		return IRQ_HANDLED;
+	/* based on flags decide if we can finish the transfer */
+	if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
+		/* Transfer complete - reset SPI HW */
+		bcm2835_spi_reset_hw(master);
+		/* wake up the framework */
+		complete(&master->xfer_completion);
 	}
 
-	/*
-	 * DONE - TX empty. This occurs when we first enable the transfer
-	 * since we do not pre-fill the TX FIFO. At any other time, given that
-	 * we refill the TX FIFO above based on RXR, and hence ignore DONE if
-	 * RXR is set, DONE really does mean end-of-transfer.
-	 */
-	if (cs & BCM2835_SPI_CS_DONE) {
-		if (bs->len) { /* First interrupt in a transfer */
-			bcm2835_wr_fifo(bs, 16);
-		} else { /* Transfer complete */
-			/* Disable SPI interrupts */
-			cs &= ~(BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD);
-			bcm2835_wr(bs, BCM2835_SPI_CS, cs);
-
-			/*
-			 * Wake up bcm2835_spi_transfer_one(), which will call
-			 * bcm2835_spi_finish_transfer(), to drain the RX FIFO.
-			 */
-			complete(&bs->done);
-		}
-
-		return IRQ_HANDLED;
-	}
-
-	return IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
-static int bcm2835_spi_start_transfer(struct spi_device *spi,
-		struct spi_transfer *tfr)
+static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
+					 struct spi_device *spi,
+					 struct spi_transfer *tfr,
+					 u32 cs,
+					 unsigned long xfer_time_us)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
-	unsigned long spi_hz, clk_hz, cdiv;
-	u32 cs = BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	unsigned long timeout = jiffies +
+		max(4 * xfer_time_us * HZ / 1000000, 2uL);
 
-	spi_hz = tfr->speed_hz;
-	clk_hz = clk_get_rate(bs->clk);
+	/* enable HW block without interrupts */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
 
-	if (spi_hz >= clk_hz / 2) {
-		cdiv = 2; /* clk_hz/2 is the fastest we can go */
-	} else if (spi_hz) {
-		/* CDIV must be a power of two */
-		cdiv = roundup_pow_of_two(DIV_ROUND_UP(clk_hz, spi_hz));
-
-		if (cdiv >= 65536)
-			cdiv = 0; /* 0 is the slowest we can go */
-	} else
-		cdiv = 0; /* 0 is the slowest we can go */
-
-	if (spi->mode & SPI_CPOL)
-		cs |= BCM2835_SPI_CS_CPOL;
-	if (spi->mode & SPI_CPHA)
-		cs |= BCM2835_SPI_CS_CPHA;
-
-	if (!(spi->mode & SPI_NO_CS)) {
-		if (spi->mode & SPI_CS_HIGH) {
-			cs |= BCM2835_SPI_CS_CSPOL;
-			cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
+	/* set timeout to 4x the expected time, or 2 jiffies */
+	/* loop until finished the transfer */
+	while (bs->rx_len) {
+		/* read from fifo as much as possible */
+		bcm2835_rd_fifo(bs);
+		/* fill in tx fifo as much as possible */
+		bcm2835_wr_fifo(bs);
+		/* if we still expect some data after the read,
+		 * check for a possible timeout
+		 */
+		if (bs->rx_len && time_after(jiffies, timeout)) {
+			/* Transfer complete - reset SPI HW */
+			bcm2835_spi_reset_hw(master);
+			/* and return timeout */
+			return -ETIMEDOUT;
 		}
-
-		cs |= spi->chip_select;
 	}
 
-	reinit_completion(&bs->done);
-	bs->tx_buf = tfr->tx_buf;
-	bs->rx_buf = tfr->rx_buf;
-	bs->len = tfr->len;
+	/* Transfer complete - reset SPI HW */
+	bcm2835_spi_reset_hw(master);
+	/* and return without waiting for completion */
+	return 0;
+}
 
-	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
+static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
+					struct spi_device *spi,
+					struct spi_transfer *tfr,
+					u32 cs)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+
+	/* fill in fifo if we have gpio-cs
+	 * note that there have been rare events where the native-CS
+	 * flapped for <1us which may change the behaviour
+	 * with gpio-cs this does not happen, so it is implemented
+	 * only for this case
+	 */
+	if (gpio_is_valid(spi->cs_gpio)) {
+		/* enable HW block, but without interrupts enabled
+		 * this would triggern an immediate interrupt
+		 */
+		bcm2835_wr(bs, BCM2835_SPI_CS,
+			   cs | BCM2835_SPI_CS_TA);
+		/* fill in tx fifo as much as possible */
+		bcm2835_wr_fifo(bs);
+	}
+
 	/*
 	 * Enable the HW block. This will immediately trigger a DONE (TX
 	 * empty) interrupt, upon which we will fill the TX FIFO with the
 	 * first TX bytes. Pre-filling the TX FIFO here to avoid the
 	 * interrupt doesn't work:-(
 	 */
+	cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
 	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
 
-	return 0;
-}
-
-static int bcm2835_spi_finish_transfer(struct spi_device *spi,
-		struct spi_transfer *tfr, bool cs_change)
-{
-	struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
-
-	/* Drain RX FIFO */
-	while (cs & BCM2835_SPI_CS_RXD) {
-		bcm2835_rd_fifo(bs, 1);
-		cs = bcm2835_rd(bs, BCM2835_SPI_CS);
-	}
-
-	if (tfr->delay_usecs)
-		udelay(tfr->delay_usecs);
-
-	if (cs_change)
-		/* Clear TA flag */
-		bcm2835_wr(bs, BCM2835_SPI_CS, cs & ~BCM2835_SPI_CS_TA);
-
-	return 0;
+	/* signal that we need to wait for completion */
+	return 1;
 }
 
 static int bcm2835_spi_transfer_one(struct spi_master *master,
-		struct spi_message *mesg)
+				    struct spi_device *spi,
+				    struct spi_transfer *tfr)
 {
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	struct spi_transfer *tfr;
-	struct spi_device *spi = mesg->spi;
-	int err = 0;
-	unsigned int timeout;
-	bool cs_change;
+	unsigned long spi_hz, clk_hz, cdiv;
+	unsigned long spi_used_hz, xfer_time_us;
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
-	list_for_each_entry(tfr, &mesg->transfers, transfer_list) {
-		err = bcm2835_spi_start_transfer(spi, tfr);
-		if (err)
-			goto out;
+	/* set clock */
+	spi_hz = tfr->speed_hz;
+	clk_hz = clk_get_rate(bs->clk);
 
-		timeout = wait_for_completion_timeout(&bs->done,
-				msecs_to_jiffies(BCM2835_SPI_TIMEOUT_MS));
-		if (!timeout) {
-			err = -ETIMEDOUT;
-			goto out;
-		}
+	if (spi_hz >= clk_hz / 2) {
+		cdiv = 2; /* clk_hz/2 is the fastest we can go */
+	} else if (spi_hz) {
+		/* CDIV must be a multiple of two */
+		cdiv = DIV_ROUND_UP(clk_hz, spi_hz);
+		cdiv += (cdiv % 2);
 
-		cs_change = tfr->cs_change ||
-			list_is_last(&tfr->transfer_list, &mesg->transfers);
+		if (cdiv >= 65536)
+			cdiv = 0; /* 0 is the slowest we can go */
+	} else {
+		cdiv = 0; /* 0 is the slowest we can go */
+	}
+	spi_used_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536);
+	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
 
-		err = bcm2835_spi_finish_transfer(spi, tfr, cs_change);
-		if (err)
-			goto out;
+	/* handle all the modes */
+	if ((spi->mode & SPI_3WIRE) && (tfr->rx_buf))
+		cs |= BCM2835_SPI_CS_REN;
+	if (spi->mode & SPI_CPOL)
+		cs |= BCM2835_SPI_CS_CPOL;
+	if (spi->mode & SPI_CPHA)
+		cs |= BCM2835_SPI_CS_CPHA;
 
-		mesg->actual_length += (tfr->len - bs->len);
+	/* for gpio_cs set dummy CS so that no HW-CS get changed
+	 * we can not run this in bcm2835_spi_set_cs, as it does
+	 * not get called for cs_gpio cases, so we need to do it here
+	 */
+	if (gpio_is_valid(spi->cs_gpio) || (spi->mode & SPI_NO_CS))
+		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+
+	/* set transmit buffers and length */
+	bs->tx_buf = tfr->tx_buf;
+	bs->rx_buf = tfr->rx_buf;
+	bs->tx_len = tfr->len;
+	bs->rx_len = tfr->len;
+
+	/* calculate the estimated time in us the transfer runs */
+	xfer_time_us = tfr->len
+		* 9 /* clocks/byte - SPI-HW waits 1 clock after each byte */
+		* 1000000 / spi_used_hz;
+
+	/* for short requests run polling*/
+	if (xfer_time_us <= BCM2835_SPI_POLLING_LIMIT_US)
+		return bcm2835_spi_transfer_one_poll(master, spi, tfr,
+						     cs, xfer_time_us);
+
+	return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
+}
+
+static void bcm2835_spi_handle_err(struct spi_master *master,
+				   struct spi_message *msg)
+{
+	bcm2835_spi_reset_hw(master);
+}
+
+static void bcm2835_spi_set_cs(struct spi_device *spi, bool gpio_level)
+{
+	/*
+	 * we can assume that we are "native" as per spi_set_cs
+	 *   calling us ONLY when cs_gpio is not set
+	 * we can also assume that we are CS < 3 as per bcm2835_spi_setup
+	 *   we would not get called because of error handling there.
+	 * the level passed is the electrical level not enabled/disabled
+	 *   so it has to get translated back to enable/disable
+	 *   see spi_set_cs in spi.c for the implementation
+	 */
+
+	struct spi_master *master = spi->master;
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+	bool enable;
+
+	/* calculate the enable flag from the passed gpio_level */
+	enable = (spi->mode & SPI_CS_HIGH) ? gpio_level : !gpio_level;
+
+	/* set flags for "reverse" polarity in the registers */
+	if (spi->mode & SPI_CS_HIGH) {
+		/* set the correct CS-bits */
+		cs |= BCM2835_SPI_CS_CSPOL;
+		cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
+	} else {
+		/* clean the CS-bits */
+		cs &= ~BCM2835_SPI_CS_CSPOL;
+		cs &= ~(BCM2835_SPI_CS_CSPOL0 << spi->chip_select);
 	}
 
-out:
-	/* Clear FIFOs, and disable the HW block */
-	bcm2835_wr(bs, BCM2835_SPI_CS,
-		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
-	mesg->status = err;
-	spi_finalize_current_message(master);
+	/* select the correct chip_select depending on disabled/enabled */
+	if (enable) {
+		/* set cs correctly */
+		if (spi->mode & SPI_NO_CS) {
+			/* use the "undefined" chip-select */
+			cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+		} else {
+			/* set the chip select */
+			cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01);
+			cs |= spi->chip_select;
+		}
+	} else {
+		/* disable CSPOL which puts HW-CS into deselected state */
+		cs &= ~BCM2835_SPI_CS_CSPOL;
+		/* use the "undefined" chip-select as precaution */
+		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+	}
+
+	/* finally set the calculated flags in SPI_CS */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+}
+
+static int chip_match_name(struct gpio_chip *chip, void *data)
+{
+	return !strcmp(chip->label, data);
+}
+
+static int bcm2835_spi_setup(struct spi_device *spi)
+{
+	int err;
+	struct gpio_chip *chip;
+	/*
+	 * sanity checking the native-chipselects
+	 */
+	if (spi->mode & SPI_NO_CS)
+		return 0;
+	if (gpio_is_valid(spi->cs_gpio))
+		return 0;
+	if (spi->chip_select > 1) {
+		/* error in the case of native CS requested with CS > 1
+		 * officially there is a CS2, but it is not documented
+		 * which GPIO is connected with that...
+		 */
+		dev_err(&spi->dev,
+			"setup: only two native chip-selects are supported\n");
+		return -EINVAL;
+	}
+	/* now translate native cs to GPIO */
+
+	/* get the gpio chip for the base */
+	chip = gpiochip_find("pinctrl-bcm2835", chip_match_name);
+	if (!chip)
+		return 0;
+
+	/* and calculate the real CS */
+	spi->cs_gpio = chip->base + 8 - spi->chip_select;
+
+	/* and set up the "mode" and level */
+	dev_info(&spi->dev, "setting up native-CS%i as GPIO %i\n",
+		 spi->chip_select, spi->cs_gpio);
+
+	/* set up GPIO as output and pull to the correct level */
+	err = gpio_direction_output(spi->cs_gpio,
+				    (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+	if (err) {
+		dev_err(&spi->dev,
+			"could not set CS%i gpio %i as output: %i",
+			spi->chip_select, spi->cs_gpio, err);
+		return err;
+	}
+	/* the implementation of pinctrl-bcm2835 currently does not
+	 * set the GPIO value when using gpio_direction_output
+	 * so we are setting it here explicitly
+	 */
+	gpio_set_value(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
 
 	return 0;
 }
@@ -312,13 +426,14 @@
 	master->mode_bits = BCM2835_SPI_MODE_BITS;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->num_chipselect = 3;
-	master->transfer_one_message = bcm2835_spi_transfer_one;
+	master->setup = bcm2835_spi_setup;
+	master->set_cs = bcm2835_spi_set_cs;
+	master->transfer_one = bcm2835_spi_transfer_one;
+	master->handle_err = bcm2835_spi_handle_err;
 	master->dev.of_node = pdev->dev.of_node;
 
 	bs = spi_master_get_devdata(master);
 
-	init_completion(&bs->done);
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bs->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(bs->regs)) {
@@ -343,13 +458,13 @@
 	clk_prepare_enable(bs->clk);
 
 	err = devm_request_irq(&pdev->dev, bs->irq, bcm2835_spi_interrupt, 0,
-				dev_name(&pdev->dev), master);
+			       dev_name(&pdev->dev), master);
 	if (err) {
 		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
 		goto out_clk_disable;
 	}
 
-	/* initialise the hardware */
+	/* initialise the hardware with the default polarities */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
diff --git a/drivers/spi/spi-bcm53xx.c b/drivers/spi/spi-bcm53xx.c
index 3fb91c8..1520554 100644
--- a/drivers/spi/spi-bcm53xx.c
+++ b/drivers/spi/spi-bcm53xx.c
@@ -44,7 +44,7 @@
 	u32 tmp;
 
 	/* SPE bit has to be 0 before we read MSPI STATUS */
-	deadline = jiffies + BCM53XXSPI_SPE_TIMEOUT_MS * HZ / 1000;
+	deadline = jiffies + msecs_to_jiffies(BCM53XXSPI_SPE_TIMEOUT_MS);
 	do {
 		tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
 		if (!(tmp & B53SPI_MSPI_SPCR2_SPE))
@@ -56,7 +56,7 @@
 		goto spi_timeout;
 
 	/* Check status */
-	deadline = jiffies + timeout_ms * HZ / 1000;
+	deadline = jiffies + msecs_to_jiffies(timeout_ms);
 	do {
 		tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_MSPI_STATUS);
 		if (tmp & B53SPI_MSPI_MSPI_STATUS_SPIF) {
diff --git a/drivers/spi/spi-bfin5xx.c b/drivers/spi/spi-bfin5xx.c
index 3707993..a3d65b4 100644
--- a/drivers/spi/spi-bfin5xx.c
+++ b/drivers/spi/spi-bfin5xx.c
@@ -559,7 +559,7 @@
 	struct spi_transfer *previous = NULL;
 	struct bfin_spi_slave_data *chip = NULL;
 	unsigned int bits_per_word;
-	u16 cr, cr_width, dma_width, dma_config;
+	u16 cr, cr_width = 0, dma_width, dma_config;
 	u32 tranf_success = 1;
 	u8 full_duplex = 0;
 
@@ -648,7 +648,6 @@
 	} else if (bits_per_word == 8) {
 		drv_data->n_bytes = bits_per_word/8;
 		drv_data->len = transfer->len;
-		cr_width = 0;
 		drv_data->ops = &bfin_bfin_spi_transfer_ops_u8;
 	}
 	cr = bfin_read(&drv_data->regs->ctl) & ~(BIT_CTL_TIMOD | BIT_CTL_WORDSIZE);
diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
index c616e41..06b34e5 100644
--- a/drivers/spi/spi-bitbang-txrx.h
+++ b/drivers/spi/spi-bitbang-txrx.h
@@ -49,12 +49,17 @@
 {
 	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
 
+	bool oldbit = !(word & 1);
 	/* clock starts at inactive polarity */
 	for (word <<= (32 - bits); likely(bits); bits--) {
 
 		/* setup MSB (to slave) on trailing edge */
-		if ((flags & SPI_MASTER_NO_TX) == 0)
-			setmosi(spi, word & (1 << 31));
+		if ((flags & SPI_MASTER_NO_TX) == 0) {
+			if ((word & (1 << 31)) != oldbit) {
+				setmosi(spi, word & (1 << 31));
+				oldbit = word & (1 << 31);
+			}
+		}
 		spidelay(nsecs);	/* T(setup) */
 
 		setsck(spi, !cpol);
@@ -76,13 +81,18 @@
 {
 	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
 
+	bool oldbit = !(word & (1 << 31));
 	/* clock starts at inactive polarity */
 	for (word <<= (32 - bits); likely(bits); bits--) {
 
 		/* setup MSB (to slave) on leading edge */
 		setsck(spi, !cpol);
-		if ((flags & SPI_MASTER_NO_TX) == 0)
-			setmosi(spi, word & (1 << 31));
+		if ((flags & SPI_MASTER_NO_TX) == 0) {
+			if ((word & (1 << 31)) != oldbit) {
+				setmosi(spi, word & (1 << 31));
+				oldbit = word & (1 << 31);
+			}
+		}
 		spidelay(nsecs); /* T(setup) */
 
 		setsck(spi, cpol);
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 4f8c798..bb1052e 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -23,29 +23,31 @@
 #include "spi-dw.h"
 
 #ifdef CONFIG_SPI_DW_MID_DMA
-#include <linux/intel_mid_dma.h>
 #include <linux/pci.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define RX_BUSY		0
 #define TX_BUSY		1
 
-struct mid_dma {
-	struct intel_mid_dma_slave	dmas_tx;
-	struct intel_mid_dma_slave	dmas_rx;
-};
+static struct dw_dma_slave mid_dma_tx = { .dst_id = 1 };
+static struct dw_dma_slave mid_dma_rx = { .src_id = 0 };
 
 static bool mid_spi_dma_chan_filter(struct dma_chan *chan, void *param)
 {
-	struct dw_spi *dws = param;
+	struct dw_dma_slave *s = param;
 
-	return dws->dma_dev == chan->device->dev;
+	if (s->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = s;
+	return true;
 }
 
 static int mid_spi_dma_init(struct dw_spi *dws)
 {
-	struct mid_dma *dw_dma = dws->dma_priv;
 	struct pci_dev *dma_dev;
-	struct intel_mid_dma_slave *rxs, *txs;
+	struct dw_dma_slave *tx = dws->dma_tx;
+	struct dw_dma_slave *rx = dws->dma_rx;
 	dma_cap_mask_t mask;
 
 	/*
@@ -56,28 +58,22 @@
 	if (!dma_dev)
 		return -ENODEV;
 
-	dws->dma_dev = &dma_dev->dev;
-
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
 	/* 1. Init rx channel */
-	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+	rx->dma_dev = &dma_dev->dev;
+	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, rx);
 	if (!dws->rxchan)
 		goto err_exit;
-	rxs = &dw_dma->dmas_rx;
-	rxs->hs_mode = LNW_DMA_HW_HS;
-	rxs->cfg_mode = LNW_DMA_PER_TO_MEM;
-	dws->rxchan->private = rxs;
+	dws->master->dma_rx = dws->rxchan;
 
 	/* 2. Init tx channel */
-	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+	tx->dma_dev = &dma_dev->dev;
+	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, tx);
 	if (!dws->txchan)
 		goto free_rxchan;
-	txs = &dw_dma->dmas_tx;
-	txs->hs_mode = LNW_DMA_HW_HS;
-	txs->cfg_mode = LNW_DMA_MEM_TO_PER;
-	dws->txchan->private = txs;
+	dws->master->dma_tx = dws->txchan;
 
 	dws->dma_inited = 1;
 	return 0;
@@ -100,6 +96,42 @@
 	dma_release_channel(dws->rxchan);
 }
 
+static irqreturn_t dma_transfer(struct dw_spi *dws)
+{
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
+
+	if (!irq_status)
+		return IRQ_NONE;
+
+	dw_readl(dws, DW_SPI_ICR);
+	spi_reset_chip(dws);
+
+	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
+	dws->master->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(dws->master);
+	return IRQ_HANDLED;
+}
+
+static bool mid_spi_can_dma(struct spi_master *master, struct spi_device *spi,
+		struct spi_transfer *xfer)
+{
+	struct dw_spi *dws = spi_master_get_devdata(master);
+
+	if (!dws->dma_inited)
+		return false;
+
+	return xfer->len > dws->fifo_len;
+}
+
+static enum dma_slave_buswidth convert_dma_width(u32 dma_width) {
+	if (dma_width == 1)
+		return DMA_SLAVE_BUSWIDTH_1_BYTE;
+	else if (dma_width == 2)
+		return DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
+}
+
 /*
  * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
  * channel will clear a corresponding bit.
@@ -111,33 +143,30 @@
 	clear_bit(TX_BUSY, &dws->dma_chan_busy);
 	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
 		return;
-	dw_spi_xfer_done(dws);
+	spi_finalize_current_transfer(dws->master);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws)
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws,
+		struct spi_transfer *xfer)
 {
 	struct dma_slave_config txconf;
 	struct dma_async_tx_descriptor *txdesc;
 
-	if (!dws->tx_dma)
+	if (!xfer->tx_buf)
 		return NULL;
 
 	txconf.direction = DMA_MEM_TO_DEV;
 	txconf.dst_addr = dws->dma_addr;
-	txconf.dst_maxburst = LNW_DMA_MSIZE_16;
+	txconf.dst_maxburst = 16;
 	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	txconf.dst_addr_width = dws->dma_width;
+	txconf.dst_addr_width = convert_dma_width(dws->dma_width);
 	txconf.device_fc = false;
 
 	dmaengine_slave_config(dws->txchan, &txconf);
 
-	memset(&dws->tx_sgl, 0, sizeof(dws->tx_sgl));
-	dws->tx_sgl.dma_address = dws->tx_dma;
-	dws->tx_sgl.length = dws->len;
-
 	txdesc = dmaengine_prep_slave_sg(dws->txchan,
-				&dws->tx_sgl,
-				1,
+				xfer->tx_sg.sgl,
+				xfer->tx_sg.nents,
 				DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!txdesc)
@@ -160,33 +189,30 @@
 	clear_bit(RX_BUSY, &dws->dma_chan_busy);
 	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
 		return;
-	dw_spi_xfer_done(dws);
+	spi_finalize_current_transfer(dws->master);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws)
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
+		struct spi_transfer *xfer)
 {
 	struct dma_slave_config rxconf;
 	struct dma_async_tx_descriptor *rxdesc;
 
-	if (!dws->rx_dma)
+	if (!xfer->rx_buf)
 		return NULL;
 
 	rxconf.direction = DMA_DEV_TO_MEM;
 	rxconf.src_addr = dws->dma_addr;
-	rxconf.src_maxburst = LNW_DMA_MSIZE_16;
+	rxconf.src_maxburst = 16;
 	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	rxconf.src_addr_width = dws->dma_width;
+	rxconf.src_addr_width = convert_dma_width(dws->dma_width);
 	rxconf.device_fc = false;
 
 	dmaengine_slave_config(dws->rxchan, &rxconf);
 
-	memset(&dws->rx_sgl, 0, sizeof(dws->rx_sgl));
-	dws->rx_sgl.dma_address = dws->rx_dma;
-	dws->rx_sgl.length = dws->len;
-
 	rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
-				&dws->rx_sgl,
-				1,
+				xfer->rx_sg.sgl,
+				xfer->rx_sg.nents,
 				DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!rxdesc)
@@ -198,37 +224,36 @@
 	return rxdesc;
 }
 
-static void dw_spi_dma_setup(struct dw_spi *dws)
+static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
 {
 	u16 dma_ctrl = 0;
 
-	spi_enable_chip(dws, 0);
+	dw_writel(dws, DW_SPI_DMARDLR, 0xf);
+	dw_writel(dws, DW_SPI_DMATDLR, 0x10);
 
-	dw_writew(dws, DW_SPI_DMARDLR, 0xf);
-	dw_writew(dws, DW_SPI_DMATDLR, 0x10);
-
-	if (dws->tx_dma)
+	if (xfer->tx_buf)
 		dma_ctrl |= SPI_DMA_TDMAE;
-	if (dws->rx_dma)
+	if (xfer->rx_buf)
 		dma_ctrl |= SPI_DMA_RDMAE;
-	dw_writew(dws, DW_SPI_DMACR, dma_ctrl);
+	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
 
-	spi_enable_chip(dws, 1);
+	/* Set the interrupt mask */
+	spi_umask_intr(dws, SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI);
+
+	dws->transfer_handler = dma_transfer;
+
+	return 0;
 }
 
-static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
+static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
 {
 	struct dma_async_tx_descriptor *txdesc, *rxdesc;
 
-	/* 1. setup DMA related registers */
-	if (cs_change)
-		dw_spi_dma_setup(dws);
+	/* Prepare the TX dma transfer */
+	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
 
-	/* 2. Prepare the TX dma transfer */
-	txdesc = dw_spi_dma_prepare_tx(dws);
-
-	/* 3. Prepare the RX dma transfer */
-	rxdesc = dw_spi_dma_prepare_rx(dws);
+	/* Prepare the RX dma transfer */
+	rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
 
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
@@ -246,10 +271,25 @@
 	return 0;
 }
 
+static void mid_spi_dma_stop(struct dw_spi *dws)
+{
+	if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_all(dws->txchan);
+		clear_bit(TX_BUSY, &dws->dma_chan_busy);
+	}
+	if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_all(dws->rxchan);
+		clear_bit(RX_BUSY, &dws->dma_chan_busy);
+	}
+}
+
 static struct dw_spi_dma_ops mid_dma_ops = {
 	.dma_init	= mid_spi_dma_init,
 	.dma_exit	= mid_spi_dma_exit,
+	.dma_setup	= mid_spi_dma_setup,
+	.can_dma	= mid_spi_can_dma,
 	.dma_transfer	= mid_spi_dma_transfer,
+	.dma_stop	= mid_spi_dma_stop,
 };
 #endif
 
@@ -282,9 +322,8 @@
 	iounmap(clk_reg);
 
 #ifdef CONFIG_SPI_DW_MID_DMA
-	dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL);
-	if (!dws->dma_priv)
-		return -ENOMEM;
+	dws->dma_tx = &mid_dma_tx;
+	dws->dma_rx = &mid_dma_rx;
 	dws->dma_ops = &mid_dma_ops;
 #endif
 	return 0;
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index 4847afb..8d67d03 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -28,11 +28,6 @@
 #include <linux/debugfs.h>
 #endif
 
-#define START_STATE	((void *)0)
-#define RUNNING_STATE	((void *)1)
-#define DONE_STATE	((void *)2)
-#define ERROR_STATE	((void *)-1)
-
 /* Slave spi_dev related */
 struct chip_data {
 	u16 cr0;
@@ -143,13 +138,26 @@
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static void dw_spi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct chip_data *chip = spi_get_ctldata(spi);
+
+	/* Chip select logic is inverted from spi_set_cs() */
+	if (chip && chip->cs_control)
+		chip->cs_control(!enable);
+
+	if (!enable)
+		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
+}
+
 /* Return the max entries we can fill into tx fifo */
 static inline u32 tx_max(struct dw_spi *dws)
 {
 	u32 tx_left, tx_room, rxtx_gap;
 
 	tx_left = (dws->tx_end - dws->tx) / dws->n_bytes;
-	tx_room = dws->fifo_len - dw_readw(dws, DW_SPI_TXFLR);
+	tx_room = dws->fifo_len - dw_readl(dws, DW_SPI_TXFLR);
 
 	/*
 	 * Another concern is about the tx/rx mismatch, we
@@ -170,7 +178,7 @@
 {
 	u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes;
 
-	return min_t(u32, rx_left, dw_readw(dws, DW_SPI_RXFLR));
+	return min_t(u32, rx_left, dw_readl(dws, DW_SPI_RXFLR));
 }
 
 static void dw_writer(struct dw_spi *dws)
@@ -186,7 +194,7 @@
 			else
 				txw = *(u16 *)(dws->tx);
 		}
-		dw_writew(dws, DW_SPI_DR, txw);
+		dw_writel(dws, DW_SPI_DR, txw);
 		dws->tx += dws->n_bytes;
 	}
 }
@@ -197,7 +205,7 @@
 	u16 rxw;
 
 	while (max--) {
-		rxw = dw_readw(dws, DW_SPI_DR);
+		rxw = dw_readl(dws, DW_SPI_DR);
 		/* Care rx only if the transfer's original "rx" is not null */
 		if (dws->rx_end - dws->len) {
 			if (dws->n_bytes == 1)
@@ -209,103 +217,22 @@
 	}
 }
 
-static void *next_transfer(struct dw_spi *dws)
-{
-	struct spi_message *msg = dws->cur_msg;
-	struct spi_transfer *trans = dws->cur_transfer;
-
-	/* Move to next transfer */
-	if (trans->transfer_list.next != &msg->transfers) {
-		dws->cur_transfer =
-			list_entry(trans->transfer_list.next,
-					struct spi_transfer,
-					transfer_list);
-		return RUNNING_STATE;
-	}
-
-	return DONE_STATE;
-}
-
-/*
- * Note: first step is the protocol driver prepares
- * a dma-capable memory, and this func just need translate
- * the virt addr to physical
- */
-static int map_dma_buffers(struct dw_spi *dws)
-{
-	if (!dws->cur_msg->is_dma_mapped
-		|| !dws->dma_inited
-		|| !dws->cur_chip->enable_dma
-		|| !dws->dma_ops)
-		return 0;
-
-	if (dws->cur_transfer->tx_dma)
-		dws->tx_dma = dws->cur_transfer->tx_dma;
-
-	if (dws->cur_transfer->rx_dma)
-		dws->rx_dma = dws->cur_transfer->rx_dma;
-
-	return 1;
-}
-
-/* Caller already set message->status; dma and pio irqs are blocked */
-static void giveback(struct dw_spi *dws)
-{
-	struct spi_transfer *last_transfer;
-	struct spi_message *msg;
-
-	msg = dws->cur_msg;
-	dws->cur_msg = NULL;
-	dws->cur_transfer = NULL;
-	dws->prev_chip = dws->cur_chip;
-	dws->cur_chip = NULL;
-	dws->dma_mapped = 0;
-
-	last_transfer = list_last_entry(&msg->transfers, struct spi_transfer,
-					transfer_list);
-
-	if (!last_transfer->cs_change)
-		spi_chip_sel(dws, msg->spi, 0);
-
-	spi_finalize_current_message(dws->master);
-}
-
 static void int_error_stop(struct dw_spi *dws, const char *msg)
 {
-	/* Stop the hw */
-	spi_enable_chip(dws, 0);
+	spi_reset_chip(dws);
 
 	dev_err(&dws->master->dev, "%s\n", msg);
-	dws->cur_msg->state = ERROR_STATE;
-	tasklet_schedule(&dws->pump_transfers);
+	dws->master->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(dws->master);
 }
 
-void dw_spi_xfer_done(struct dw_spi *dws)
-{
-	/* Update total byte transferred return count actual bytes read */
-	dws->cur_msg->actual_length += dws->len;
-
-	/* Move to next transfer */
-	dws->cur_msg->state = next_transfer(dws);
-
-	/* Handle end of message */
-	if (dws->cur_msg->state == DONE_STATE) {
-		dws->cur_msg->status = 0;
-		giveback(dws);
-	} else
-		tasklet_schedule(&dws->pump_transfers);
-}
-EXPORT_SYMBOL_GPL(dw_spi_xfer_done);
-
 static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 {
-	u16 irq_status = dw_readw(dws, DW_SPI_ISR);
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
 
 	/* Error handling */
 	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
-		dw_readw(dws, DW_SPI_TXOICR);
-		dw_readw(dws, DW_SPI_RXOICR);
-		dw_readw(dws, DW_SPI_RXUICR);
+		dw_readl(dws, DW_SPI_ICR);
 		int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun");
 		return IRQ_HANDLED;
 	}
@@ -313,7 +240,7 @@
 	dw_reader(dws);
 	if (dws->rx_end == dws->rx) {
 		spi_mask_intr(dws, SPI_INT_TXEI);
-		dw_spi_xfer_done(dws);
+		spi_finalize_current_transfer(dws->master);
 		return IRQ_HANDLED;
 	}
 	if (irq_status & SPI_INT_TXEI) {
@@ -328,13 +255,14 @@
 
 static irqreturn_t dw_spi_irq(int irq, void *dev_id)
 {
-	struct dw_spi *dws = dev_id;
-	u16 irq_status = dw_readw(dws, DW_SPI_ISR) & 0x3f;
+	struct spi_master *master = dev_id;
+	struct dw_spi *dws = spi_master_get_devdata(master);
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR) & 0x3f;
 
 	if (!irq_status)
 		return IRQ_NONE;
 
-	if (!dws->cur_msg) {
+	if (!master->cur_msg) {
 		spi_mask_intr(dws, SPI_INT_TXEI);
 		return IRQ_HANDLED;
 	}
@@ -343,7 +271,7 @@
 }
 
 /* Must be called inside pump_transfers() */
-static void poll_transfer(struct dw_spi *dws)
+static int poll_transfer(struct dw_spi *dws)
 {
 	do {
 		dw_writer(dws);
@@ -351,64 +279,32 @@
 		cpu_relax();
 	} while (dws->rx_end > dws->rx);
 
-	dw_spi_xfer_done(dws);
+	return 0;
 }
 
-static void pump_transfers(unsigned long data)
+static int dw_spi_transfer_one(struct spi_master *master,
+		struct spi_device *spi, struct spi_transfer *transfer)
 {
-	struct dw_spi *dws = (struct dw_spi *)data;
-	struct spi_message *message = NULL;
-	struct spi_transfer *transfer = NULL;
-	struct spi_transfer *previous = NULL;
-	struct spi_device *spi = NULL;
-	struct chip_data *chip = NULL;
-	u8 bits = 0;
+	struct dw_spi *dws = spi_master_get_devdata(master);
+	struct chip_data *chip = spi_get_ctldata(spi);
 	u8 imask = 0;
-	u8 cs_change = 0;
-	u16 txint_level = 0;
+	u16 txlevel = 0;
 	u16 clk_div = 0;
 	u32 speed = 0;
 	u32 cr0 = 0;
+	int ret;
 
-	/* Get current state information */
-	message = dws->cur_msg;
-	transfer = dws->cur_transfer;
-	chip = dws->cur_chip;
-	spi = message->spi;
-
-	if (message->state == ERROR_STATE) {
-		message->status = -EIO;
-		goto early_exit;
-	}
-
-	/* Handle end of message */
-	if (message->state == DONE_STATE) {
-		message->status = 0;
-		goto early_exit;
-	}
-
-	/* Delay if requested at end of transfer */
-	if (message->state == RUNNING_STATE) {
-		previous = list_entry(transfer->transfer_list.prev,
-					struct spi_transfer,
-					transfer_list);
-		if (previous->delay_usecs)
-			udelay(previous->delay_usecs);
-	}
-
+	dws->dma_mapped = 0;
 	dws->n_bytes = chip->n_bytes;
 	dws->dma_width = chip->dma_width;
-	dws->cs_control = chip->cs_control;
 
-	dws->rx_dma = transfer->rx_dma;
-	dws->tx_dma = transfer->tx_dma;
 	dws->tx = (void *)transfer->tx_buf;
 	dws->tx_end = dws->tx + transfer->len;
 	dws->rx = transfer->rx_buf;
 	dws->rx_end = dws->rx + transfer->len;
-	dws->len = dws->cur_transfer->len;
-	if (chip != dws->prev_chip)
-		cs_change = 1;
+	dws->len = transfer->len;
+
+	spi_enable_chip(dws, 0);
 
 	cr0 = chip->cr0;
 
@@ -416,32 +312,37 @@
 	if (transfer->speed_hz) {
 		speed = chip->speed_hz;
 
-		if ((transfer->speed_hz != speed) || (!chip->clk_div)) {
+		if ((transfer->speed_hz != speed) || !chip->clk_div) {
 			speed = transfer->speed_hz;
 
 			/* clk_div doesn't support odd number */
-			clk_div = dws->max_freq / speed;
-			clk_div = (clk_div + 1) & 0xfffe;
+			clk_div = (dws->max_freq / speed + 1) & 0xfffe;
 
 			chip->speed_hz = speed;
 			chip->clk_div = clk_div;
+
+			spi_set_clk(dws, chip->clk_div);
 		}
 	}
 	if (transfer->bits_per_word) {
-		bits = transfer->bits_per_word;
-		dws->n_bytes = dws->dma_width = bits >> 3;
-		cr0 = (bits - 1)
+		if (transfer->bits_per_word == 8) {
+			dws->n_bytes = 1;
+			dws->dma_width = 1;
+		} else if (transfer->bits_per_word == 16) {
+			dws->n_bytes = 2;
+			dws->dma_width = 2;
+		}
+		cr0 = (transfer->bits_per_word - 1)
 			| (chip->type << SPI_FRF_OFFSET)
 			| (spi->mode << SPI_MODE_OFFSET)
 			| (chip->tmode << SPI_TMOD_OFFSET);
 	}
-	message->state = RUNNING_STATE;
 
 	/*
 	 * Adjust transfer mode if necessary. Requires platform dependent
 	 * chipselect mechanism.
 	 */
-	if (dws->cs_control) {
+	if (chip->cs_control) {
 		if (dws->rx && dws->tx)
 			chip->tmode = SPI_TMOD_TR;
 		else if (dws->rx)
@@ -453,80 +354,60 @@
 		cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
 	}
 
+	dw_writel(dws, DW_SPI_CTRL0, cr0);
+
 	/* Check if current transfer is a DMA transaction */
-	dws->dma_mapped = map_dma_buffers(dws);
+	if (master->can_dma && master->can_dma(master, spi, transfer))
+		dws->dma_mapped = master->cur_msg_mapped;
+
+	/* For poll mode just disable all interrupts */
+	spi_mask_intr(dws, 0xff);
 
 	/*
 	 * Interrupt mode
 	 * we only need set the TXEI IRQ, as TX/RX always happen syncronizely
 	 */
-	if (!dws->dma_mapped && !chip->poll_mode) {
-		int templen = dws->len / dws->n_bytes;
+	if (dws->dma_mapped) {
+		ret = dws->dma_ops->dma_setup(dws, transfer);
+		if (ret < 0) {
+			spi_enable_chip(dws, 1);
+			return ret;
+		}
+	} else if (!chip->poll_mode) {
+		txlevel = min_t(u16, dws->fifo_len / 2, dws->len / dws->n_bytes);
+		dw_writel(dws, DW_SPI_TXFLTR, txlevel);
 
-		txint_level = dws->fifo_len / 2;
-		txint_level = (templen > txint_level) ? txint_level : templen;
-
+		/* Set the interrupt mask */
 		imask |= SPI_INT_TXEI | SPI_INT_TXOI |
 			 SPI_INT_RXUI | SPI_INT_RXOI;
+		spi_umask_intr(dws, imask);
+
 		dws->transfer_handler = interrupt_transfer;
 	}
 
-	/*
-	 * Reprogram registers only if
-	 *	1. chip select changes
-	 *	2. clk_div is changed
-	 *	3. control value changes
-	 */
-	if (dw_readw(dws, DW_SPI_CTRL0) != cr0 || cs_change || clk_div || imask) {
-		spi_enable_chip(dws, 0);
+	spi_enable_chip(dws, 1);
 
-		if (dw_readw(dws, DW_SPI_CTRL0) != cr0)
-			dw_writew(dws, DW_SPI_CTRL0, cr0);
-
-		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
-		spi_chip_sel(dws, spi, 1);
-
-		/* Set the interrupt mask, for poll mode just disable all int */
-		spi_mask_intr(dws, 0xff);
-		if (imask)
-			spi_umask_intr(dws, imask);
-		if (txint_level)
-			dw_writew(dws, DW_SPI_TXFLTR, txint_level);
-
-		spi_enable_chip(dws, 1);
-		if (cs_change)
-			dws->prev_chip = chip;
+	if (dws->dma_mapped) {
+		ret = dws->dma_ops->dma_transfer(dws, transfer);
+		if (ret < 0)
+			return ret;
 	}
 
-	if (dws->dma_mapped)
-		dws->dma_ops->dma_transfer(dws, cs_change);
-
 	if (chip->poll_mode)
-		poll_transfer(dws);
+		return poll_transfer(dws);
 
-	return;
-
-early_exit:
-	giveback(dws);
+	return 1;
 }
 
-static int dw_spi_transfer_one_message(struct spi_master *master,
+static void dw_spi_handle_err(struct spi_master *master,
 		struct spi_message *msg)
 {
 	struct dw_spi *dws = spi_master_get_devdata(master);
 
-	dws->cur_msg = msg;
-	/* Initial message state */
-	dws->cur_msg->state = START_STATE;
-	dws->cur_transfer = list_entry(dws->cur_msg->transfers.next,
-						struct spi_transfer,
-						transfer_list);
-	dws->cur_chip = spi_get_ctldata(dws->cur_msg->spi);
+	if (dws->dma_mapped)
+		dws->dma_ops->dma_stop(dws);
 
-	/* Launch transfers */
-	tasklet_schedule(&dws->pump_transfers);
-
-	return 0;
+	spi_reset_chip(dws);
 }
 
 /* This may be called twice for each spi dev */
@@ -561,8 +442,6 @@
 
 		chip->rx_threshold = 0;
 		chip->tx_threshold = 0;
-
-		chip->enable_dma = chip_info->enable_dma;
 	}
 
 	if (spi->bits_per_word == 8) {
@@ -610,9 +489,7 @@
 /* Restart the controller, disable all interrupts, clean rx fifo */
 static void spi_hw_init(struct device *dev, struct dw_spi *dws)
 {
-	spi_enable_chip(dws, 0);
-	spi_mask_intr(dws, 0xff);
-	spi_enable_chip(dws, 1);
+	spi_reset_chip(dws);
 
 	/*
 	 * Try to detect the FIFO depth if not set by interface driver,
@@ -622,11 +499,11 @@
 		u32 fifo;
 
 		for (fifo = 1; fifo < 256; fifo++) {
-			dw_writew(dws, DW_SPI_TXFLTR, fifo);
-			if (fifo != dw_readw(dws, DW_SPI_TXFLTR))
+			dw_writel(dws, DW_SPI_TXFLTR, fifo);
+			if (fifo != dw_readl(dws, DW_SPI_TXFLTR))
 				break;
 		}
-		dw_writew(dws, DW_SPI_TXFLTR, 0);
+		dw_writel(dws, DW_SPI_TXFLTR, 0);
 
 		dws->fifo_len = (fifo == 1) ? 0 : fifo;
 		dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
@@ -646,13 +523,12 @@
 
 	dws->master = master;
 	dws->type = SSI_MOTO_SPI;
-	dws->prev_chip = NULL;
 	dws->dma_inited = 0;
 	dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60);
 	snprintf(dws->name, sizeof(dws->name), "dw_spi%d", dws->bus_num);
 
 	ret = devm_request_irq(dev, dws->irq, dw_spi_irq, IRQF_SHARED,
-			dws->name, dws);
+			dws->name, master);
 	if (ret < 0) {
 		dev_err(&master->dev, "can not get IRQ\n");
 		goto err_free_master;
@@ -664,7 +540,9 @@
 	master->num_chipselect = dws->num_cs;
 	master->setup = dw_spi_setup;
 	master->cleanup = dw_spi_cleanup;
-	master->transfer_one_message = dw_spi_transfer_one_message;
+	master->set_cs = dw_spi_set_cs;
+	master->transfer_one = dw_spi_transfer_one;
+	master->handle_err = dw_spi_handle_err;
 	master->max_speed_hz = dws->max_freq;
 	master->dev.of_node = dev->of_node;
 
@@ -676,11 +554,11 @@
 		if (ret) {
 			dev_warn(dev, "DMA init failed\n");
 			dws->dma_inited = 0;
+		} else {
+			master->can_dma = dws->dma_ops->can_dma;
 		}
 	}
 
-	tasklet_init(&dws->pump_transfers, pump_transfers, (unsigned long)dws);
-
 	spi_master_set_devdata(master, dws);
 	ret = devm_spi_register_master(dev, master);
 	if (ret) {
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 3d32be6..6c91391 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -91,12 +91,15 @@
 struct dw_spi_dma_ops {
 	int (*dma_init)(struct dw_spi *dws);
 	void (*dma_exit)(struct dw_spi *dws);
-	int (*dma_transfer)(struct dw_spi *dws, int cs_change);
+	int (*dma_setup)(struct dw_spi *dws, struct spi_transfer *xfer);
+	bool (*can_dma)(struct spi_master *master, struct spi_device *spi,
+			struct spi_transfer *xfer);
+	int (*dma_transfer)(struct dw_spi *dws, struct spi_transfer *xfer);
+	void (*dma_stop)(struct dw_spi *dws);
 };
 
 struct dw_spi {
 	struct spi_master	*master;
-	struct spi_device	*cur_dev;
 	enum dw_ssi_type	type;
 	char			name[16];
 
@@ -109,41 +112,26 @@
 	u16			bus_num;
 	u16			num_cs;		/* supported slave numbers */
 
-	/* Message Transfer pump */
-	struct tasklet_struct	pump_transfers;
-
 	/* Current message transfer state info */
-	struct spi_message	*cur_msg;
-	struct spi_transfer	*cur_transfer;
-	struct chip_data	*cur_chip;
-	struct chip_data	*prev_chip;
 	size_t			len;
 	void			*tx;
 	void			*tx_end;
 	void			*rx;
 	void			*rx_end;
 	int			dma_mapped;
-	dma_addr_t		rx_dma;
-	dma_addr_t		tx_dma;
-	size_t			rx_map_len;
-	size_t			tx_map_len;
 	u8			n_bytes;	/* current is a 1/2 bytes op */
-	u8			max_bits_per_word;	/* maxim is 16b */
 	u32			dma_width;
 	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
-	void			(*cs_control)(u32 command);
 
-	/* Dma info */
+	/* DMA info */
 	int			dma_inited;
 	struct dma_chan		*txchan;
-	struct scatterlist	tx_sgl;
 	struct dma_chan		*rxchan;
-	struct scatterlist	rx_sgl;
 	unsigned long		dma_chan_busy;
-	struct device		*dma_dev;
 	dma_addr_t		dma_addr; /* phy address of the Data register */
 	struct dw_spi_dma_ops	*dma_ops;
-	void			*dma_priv; /* platform relate info */
+	void			*dma_tx;
+	void			*dma_rx;
 
 	/* Bus interface info */
 	void			*priv;
@@ -162,16 +150,6 @@
 	__raw_writel(val, dws->regs + offset);
 }
 
-static inline u16 dw_readw(struct dw_spi *dws, u32 offset)
-{
-	return __raw_readw(dws->regs + offset);
-}
-
-static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val)
-{
-	__raw_writew(val, dws->regs + offset);
-}
-
 static inline void spi_enable_chip(struct dw_spi *dws, int enable)
 {
 	dw_writel(dws, DW_SPI_SSIENR, (enable ? 1 : 0));
@@ -182,22 +160,6 @@
 	dw_writel(dws, DW_SPI_BAUDR, div);
 }
 
-static inline void spi_chip_sel(struct dw_spi *dws, struct spi_device *spi,
-		int active)
-{
-	u16 cs = spi->chip_select;
-	int gpio_val = active ? (spi->mode & SPI_CS_HIGH) :
-		!(spi->mode & SPI_CS_HIGH);
-
-	if (dws->cs_control)
-		dws->cs_control(active);
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_set_value(spi->cs_gpio, gpio_val);
-
-	if (active)
-		dw_writel(dws, DW_SPI_SER, 1 << cs);
-}
-
 /* Disable IRQ bits */
 static inline void spi_mask_intr(struct dw_spi *dws, u32 mask)
 {
@@ -217,15 +179,26 @@
 }
 
 /*
+ * This does disable the SPI controller, interrupts, and re-enable the
+ * controller back. Transmit and receive FIFO buffers are cleared when the
+ * device is disabled.
+ */
+static inline void spi_reset_chip(struct dw_spi *dws)
+{
+	spi_enable_chip(dws, 0);
+	spi_mask_intr(dws, 0xff);
+	spi_enable_chip(dws, 1);
+}
+
+/*
  * Each SPI slave device to work with dw_api controller should
- * has such a structure claiming its working mode (PIO/DMA etc),
+ * has such a structure claiming its working mode (poll or PIO/DMA),
  * which can be save in the "controller_data" member of the
  * struct spi_device.
  */
 struct dw_spi_chip {
 	u8 poll_mode;	/* 1 for controller polling mode */
 	u8 type;	/* SPI/SSP/MicroWire */
-	u8 enable_dma;
 	void (*cs_control)(u32 command);
 };
 
@@ -233,7 +206,6 @@
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
-extern void dw_spi_xfer_done(struct dw_spi *dws);
 
 /* platform related setup */
 extern int dw_spi_mid_init(struct dw_spi *dws); /* Intel MID platforms */
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index d1a3924..5fe54cd 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -29,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
+#include <linux/time.h>
 
 #define DRIVER_NAME "fsl-dspi"
 
@@ -51,7 +53,7 @@
 #define SPI_CTAR_CPOL(x)	((x) << 26)
 #define SPI_CTAR_CPHA(x)	((x) << 25)
 #define SPI_CTAR_LSBFE(x)	((x) << 24)
-#define SPI_CTAR_PCSSCR(x)	(((x) & 0x00000003) << 22)
+#define SPI_CTAR_PCSSCK(x)	(((x) & 0x00000003) << 22)
 #define SPI_CTAR_PASC(x)	(((x) & 0x00000003) << 20)
 #define SPI_CTAR_PDT(x)	(((x) & 0x00000003) << 18)
 #define SPI_CTAR_PBR(x)	(((x) & 0x00000003) << 16)
@@ -59,6 +61,7 @@
 #define SPI_CTAR_ASC(x)	(((x) & 0x0000000f) << 8)
 #define SPI_CTAR_DT(x)		(((x) & 0x0000000f) << 4)
 #define SPI_CTAR_BR(x)		((x) & 0x0000000f)
+#define SPI_CTAR_SCALE_BITS	0xf
 
 #define SPI_CTAR0_SLAVE	0x0c
 
@@ -148,23 +151,66 @@
 		16,	32,	64,	128,
 		256,	512,	1024,	2048,
 		4096,	8192,	16384,	32768 };
-	int temp, i = 0, j = 0;
+	int scale_needed, scale, minscale = INT_MAX;
+	int i, j;
 
-	temp = clkrate / 2 / speed_hz;
+	scale_needed = clkrate / speed_hz;
+	if (clkrate % speed_hz)
+		scale_needed++;
 
-	for (i = 0; i < ARRAY_SIZE(pbr_tbl); i++)
-		for (j = 0; j < ARRAY_SIZE(brs); j++) {
-			if (pbr_tbl[i] * brs[j] >= temp) {
-				*pbr = i;
-				*br = j;
-				return;
+	for (i = 0; i < ARRAY_SIZE(brs); i++)
+		for (j = 0; j < ARRAY_SIZE(pbr_tbl); j++) {
+			scale = brs[i] * pbr_tbl[j];
+			if (scale >= scale_needed) {
+				if (scale < minscale) {
+					minscale = scale;
+					*br = i;
+					*pbr = j;
+				}
+				break;
 			}
 		}
 
-	pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld\
-		,we use the max prescaler value.\n", speed_hz, clkrate);
-	*pbr = ARRAY_SIZE(pbr_tbl) - 1;
-	*br =  ARRAY_SIZE(brs) - 1;
+	if (minscale == INT_MAX) {
+		pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld, we use the max prescaler value.\n",
+			speed_hz, clkrate);
+		*pbr = ARRAY_SIZE(pbr_tbl) - 1;
+		*br =  ARRAY_SIZE(brs) - 1;
+	}
+}
+
+static void ns_delay_scale(char *psc, char *sc, int delay_ns,
+		unsigned long clkrate)
+{
+	int pscale_tbl[4] = {1, 3, 5, 7};
+	int scale_needed, scale, minscale = INT_MAX;
+	int i, j;
+	u32 remainder;
+
+	scale_needed = div_u64_rem((u64)delay_ns * clkrate, NSEC_PER_SEC,
+			&remainder);
+	if (remainder)
+		scale_needed++;
+
+	for (i = 0; i < ARRAY_SIZE(pscale_tbl); i++)
+		for (j = 0; j <= SPI_CTAR_SCALE_BITS; j++) {
+			scale = pscale_tbl[i] * (2 << j);
+			if (scale >= scale_needed) {
+				if (scale < minscale) {
+					minscale = scale;
+					*psc = i;
+					*sc = j;
+				}
+				break;
+			}
+		}
+
+	if (minscale == INT_MAX) {
+		pr_warn("Cannot find correct scale values for %dns delay at clkrate %ld, using max prescaler value",
+			delay_ns, clkrate);
+		*psc = ARRAY_SIZE(pscale_tbl) - 1;
+		*sc = SPI_CTAR_SCALE_BITS;
+	}
 }
 
 static int dspi_transfer_write(struct fsl_dspi *dspi)
@@ -345,7 +391,10 @@
 {
 	struct chip_data *chip;
 	struct fsl_dspi *dspi = spi_master_get_devdata(spi->master);
-	unsigned char br = 0, pbr = 0, fmsz = 0;
+	u32 cs_sck_delay = 0, sck_cs_delay = 0;
+	unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
+	unsigned char pasc = 0, asc = 0, fmsz = 0;
+	unsigned long clkrate;
 
 	if ((spi->bits_per_word >= 4) && (spi->bits_per_word <= 16)) {
 		fmsz = spi->bits_per_word - 1;
@@ -362,18 +411,34 @@
 			return -ENOMEM;
 	}
 
+	of_property_read_u32(spi->dev.of_node, "fsl,spi-cs-sck-delay",
+			&cs_sck_delay);
+
+	of_property_read_u32(spi->dev.of_node, "fsl,spi-sck-cs-delay",
+			&sck_cs_delay);
+
 	chip->mcr_val = SPI_MCR_MASTER | SPI_MCR_PCSIS |
 		SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF;
 
 	chip->void_write_data = 0;
 
-	hz_to_spi_baud(&pbr, &br,
-			spi->max_speed_hz, clk_get_rate(dspi->clk));
+	clkrate = clk_get_rate(dspi->clk);
+	hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);
+
+	/* Set PCS to SCK delay scale values */
+	ns_delay_scale(&pcssck, &cssck, cs_sck_delay, clkrate);
+
+	/* Set After SCK delay scale values */
+	ns_delay_scale(&pasc, &asc, sck_cs_delay, clkrate);
 
 	chip->ctar_val =  SPI_CTAR_FMSZ(fmsz)
 		| SPI_CTAR_CPOL(spi->mode & SPI_CPOL ? 1 : 0)
 		| SPI_CTAR_CPHA(spi->mode & SPI_CPHA ? 1 : 0)
 		| SPI_CTAR_LSBFE(spi->mode & SPI_LSB_FIRST ? 1 : 0)
+		| SPI_CTAR_PCSSCK(pcssck)
+		| SPI_CTAR_CSSCK(cssck)
+		| SPI_CTAR_PASC(pasc)
+		| SPI_CTAR_ASC(asc)
 		| SPI_CTAR_PBR(pbr)
 		| SPI_CTAR_BR(br);
 
diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index e649bc7..788e2b1 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
+#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -122,36 +123,31 @@
 	spfi_writel(spfi, val, SPFI_CONTROL);
 }
 
-static inline void spfi_stop(struct img_spfi *spfi)
-{
-	u32 val;
-
-	val = spfi_readl(spfi, SPFI_CONTROL);
-	val &= ~SPFI_CONTROL_SPFI_EN;
-	spfi_writel(spfi, val, SPFI_CONTROL);
-}
-
 static inline void spfi_reset(struct img_spfi *spfi)
 {
 	spfi_writel(spfi, SPFI_CONTROL_SOFT_RESET, SPFI_CONTROL);
-	udelay(1);
 	spfi_writel(spfi, 0, SPFI_CONTROL);
 }
 
-static void spfi_flush_tx_fifo(struct img_spfi *spfi)
+static int spfi_wait_all_done(struct img_spfi *spfi)
 {
-	unsigned long timeout = jiffies + msecs_to_jiffies(10);
+	unsigned long timeout = jiffies + msecs_to_jiffies(50);
 
-	spfi_writel(spfi, SPFI_INTERRUPT_SDE, SPFI_INTERRUPT_CLEAR);
 	while (time_before(jiffies, timeout)) {
-		if (spfi_readl(spfi, SPFI_INTERRUPT_STATUS) &
-		    SPFI_INTERRUPT_SDE)
-			return;
+		u32 status = spfi_readl(spfi, SPFI_INTERRUPT_STATUS);
+
+		if (status & SPFI_INTERRUPT_ALLDONETRIG) {
+			spfi_writel(spfi, SPFI_INTERRUPT_ALLDONETRIG,
+				    SPFI_INTERRUPT_CLEAR);
+			return 0;
+		}
 		cpu_relax();
 	}
 
-	dev_err(spfi->dev, "Timed out waiting for FIFO to drain\n");
+	dev_err(spfi->dev, "Timed out waiting for transaction to complete\n");
 	spfi_reset(spfi);
+
+	return -ETIMEDOUT;
 }
 
 static unsigned int spfi_pio_write32(struct img_spfi *spfi, const u32 *buf,
@@ -237,6 +233,7 @@
 	const void *tx_buf = xfer->tx_buf;
 	void *rx_buf = xfer->rx_buf;
 	unsigned long timeout;
+	int ret;
 
 	if (tx_buf)
 		tx_bytes = xfer->len;
@@ -269,16 +266,15 @@
 		cpu_relax();
 	}
 
+	ret = spfi_wait_all_done(spfi);
+	if (ret < 0)
+		return ret;
+
 	if (rx_bytes > 0 || tx_bytes > 0) {
 		dev_err(spfi->dev, "PIO transfer timed out\n");
-		spfi_reset(spfi);
 		return -ETIMEDOUT;
 	}
 
-	if (tx_buf)
-		spfi_flush_tx_fifo(spfi);
-	spfi_stop(spfi);
-
 	return 0;
 }
 
@@ -287,14 +283,12 @@
 	struct img_spfi *spfi = data;
 	unsigned long flags;
 
+	spfi_wait_all_done(spfi);
+
 	spin_lock_irqsave(&spfi->lock, flags);
-
 	spfi->rx_dma_busy = false;
-	if (!spfi->tx_dma_busy) {
-		spfi_stop(spfi);
+	if (!spfi->tx_dma_busy)
 		spi_finalize_current_transfer(spfi->master);
-	}
-
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -303,16 +297,12 @@
 	struct img_spfi *spfi = data;
 	unsigned long flags;
 
-	spfi_flush_tx_fifo(spfi);
+	spfi_wait_all_done(spfi);
 
 	spin_lock_irqsave(&spfi->lock, flags);
-
 	spfi->tx_dma_busy = false;
-	if (!spfi->rx_dma_busy) {
-		spfi_stop(spfi);
+	if (!spfi->rx_dma_busy)
 		spi_finalize_current_transfer(spfi->master);
-	}
-
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -397,6 +387,75 @@
 	return -EIO;
 }
 
+static void img_spfi_handle_err(struct spi_master *master,
+				struct spi_message *msg)
+{
+	struct img_spfi *spfi = spi_master_get_devdata(master);
+	unsigned long flags;
+
+	/*
+	 * Stop all DMA and reset the controller if the previous transaction
+	 * timed-out and never completed it's DMA.
+	 */
+	spin_lock_irqsave(&spfi->lock, flags);
+	if (spfi->tx_dma_busy || spfi->rx_dma_busy) {
+		spfi->tx_dma_busy = false;
+		spfi->rx_dma_busy = false;
+
+		dmaengine_terminate_all(spfi->tx_ch);
+		dmaengine_terminate_all(spfi->rx_ch);
+	}
+	spin_unlock_irqrestore(&spfi->lock, flags);
+}
+
+static int img_spfi_prepare(struct spi_master *master, struct spi_message *msg)
+{
+	struct img_spfi *spfi = spi_master_get_devdata(master);
+	u32 val;
+
+	val = spfi_readl(spfi, SPFI_PORT_STATE);
+	if (msg->spi->mode & SPI_CPHA)
+		val |= SPFI_PORT_STATE_CK_PHASE(msg->spi->chip_select);
+	else
+		val &= ~SPFI_PORT_STATE_CK_PHASE(msg->spi->chip_select);
+	if (msg->spi->mode & SPI_CPOL)
+		val |= SPFI_PORT_STATE_CK_POL(msg->spi->chip_select);
+	else
+		val &= ~SPFI_PORT_STATE_CK_POL(msg->spi->chip_select);
+	spfi_writel(spfi, val, SPFI_PORT_STATE);
+
+	return 0;
+}
+
+static int img_spfi_unprepare(struct spi_master *master,
+			      struct spi_message *msg)
+{
+	struct img_spfi *spfi = spi_master_get_devdata(master);
+
+	spfi_reset(spfi);
+
+	return 0;
+}
+
+static int img_spfi_setup(struct spi_device *spi)
+{
+	int ret;
+
+	ret = gpio_request_one(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ?
+			       GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH,
+			       dev_name(&spi->dev));
+	if (ret)
+		dev_err(&spi->dev, "can't request chipselect gpio %d\n",
+				spi->cs_gpio);
+
+	return ret;
+}
+
+static void img_spfi_cleanup(struct spi_device *spi)
+{
+	gpio_free(spi->cs_gpio);
+}
+
 static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
@@ -405,10 +464,10 @@
 
 	/*
 	 * output = spfi_clk * (BITCLK / 512), where BITCLK must be a
-	 * power of 2 up to 256 (where 255 == 256 since BITCLK is 8 bits)
+	 * power of 2 up to 128
 	 */
-	div = DIV_ROUND_UP(master->max_speed_hz, xfer->speed_hz);
-	div = clamp(512 / (1 << get_count_order(div)), 1, 255);
+	div = DIV_ROUND_UP(clk_get_rate(spfi->spfi_clk), xfer->speed_hz);
+	div = clamp(512 / (1 << get_count_order(div)), 1, 128);
 
 	val = spfi_readl(spfi, SPFI_DEVICE_PARAMETER(spi->chip_select));
 	val &= ~(SPFI_DEVICE_PARAMETER_BITCLK_MASK <<
@@ -416,6 +475,9 @@
 	val |= div << SPFI_DEVICE_PARAMETER_BITCLK_SHIFT;
 	spfi_writel(spfi, val, SPFI_DEVICE_PARAMETER(spi->chip_select));
 
+	spfi_writel(spfi, xfer->len << SPFI_TRANSACTION_TSIZE_SHIFT,
+		    SPFI_TRANSACTION);
+
 	val = spfi_readl(spfi, SPFI_CONTROL);
 	val &= ~(SPFI_CONTROL_SEND_DMA | SPFI_CONTROL_GET_DMA);
 	if (xfer->tx_buf)
@@ -429,25 +491,7 @@
 	else if (xfer->tx_nbits == SPI_NBITS_QUAD &&
 		 xfer->rx_nbits == SPI_NBITS_QUAD)
 		val |= SPFI_CONTROL_TMODE_QUAD << SPFI_CONTROL_TMODE_SHIFT;
-	val &= ~SPFI_CONTROL_CONTINUE;
-	if (!xfer->cs_change && !list_is_last(&xfer->transfer_list,
-					      &master->cur_msg->transfers))
-		val |= SPFI_CONTROL_CONTINUE;
 	spfi_writel(spfi, val, SPFI_CONTROL);
-
-	val = spfi_readl(spfi, SPFI_PORT_STATE);
-	if (spi->mode & SPI_CPHA)
-		val |= SPFI_PORT_STATE_CK_PHASE(spi->chip_select);
-	else
-		val &= ~SPFI_PORT_STATE_CK_PHASE(spi->chip_select);
-	if (spi->mode & SPI_CPOL)
-		val |= SPFI_PORT_STATE_CK_POL(spi->chip_select);
-	else
-		val &= ~SPFI_PORT_STATE_CK_POL(spi->chip_select);
-	spfi_writel(spfi, val, SPFI_PORT_STATE);
-
-	spfi_writel(spfi, xfer->len << SPFI_TRANSACTION_TSIZE_SHIFT,
-		    SPFI_TRANSACTION);
 }
 
 static int img_spfi_transfer_one(struct spi_master *master,
@@ -455,8 +499,6 @@
 				 struct spi_transfer *xfer)
 {
 	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
-	bool dma_reset = false;
-	unsigned long flags;
 	int ret;
 
 	if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) {
@@ -466,23 +508,6 @@
 		return -EINVAL;
 	}
 
-	/*
-	 * Stop all DMA and reset the controller if the previous transaction
-	 * timed-out and never completed it's DMA.
-	 */
-	spin_lock_irqsave(&spfi->lock, flags);
-	if (spfi->tx_dma_busy || spfi->rx_dma_busy) {
-		dev_err(spfi->dev, "SPI DMA still busy\n");
-		dma_reset = true;
-	}
-	spin_unlock_irqrestore(&spfi->lock, flags);
-
-	if (dma_reset) {
-		dmaengine_terminate_all(spfi->tx_ch);
-		dmaengine_terminate_all(spfi->rx_ch);
-		spfi_reset(spfi);
-	}
-
 	img_spfi_config(master, spi, xfer);
 	if (master->can_dma && master->can_dma(master, spi, xfer))
 		ret = img_spfi_start_dma(master, spi, xfer);
@@ -492,17 +517,6 @@
 	return ret;
 }
 
-static void img_spfi_set_cs(struct spi_device *spi, bool enable)
-{
-	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
-	u32 val;
-
-	val = spfi_readl(spfi, SPFI_PORT_STATE);
-	val &= ~(SPFI_PORT_STATE_DEV_SEL_MASK << SPFI_PORT_STATE_DEV_SEL_SHIFT);
-	val |= spi->chip_select << SPFI_PORT_STATE_DEV_SEL_SHIFT;
-	spfi_writel(spfi, val, SPFI_PORT_STATE);
-}
-
 static bool img_spfi_can_dma(struct spi_master *master, struct spi_device *spi,
 			     struct spi_transfer *xfer)
 {
@@ -591,14 +605,17 @@
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_DUAL | SPI_RX_DUAL;
 	if (of_property_read_bool(spfi->dev->of_node, "img,supports-quad-mode"))
 		master->mode_bits |= SPI_TX_QUAD | SPI_RX_QUAD;
-	master->num_chipselect = 5;
 	master->dev.of_node = pdev->dev.of_node;
 	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(8);
-	master->max_speed_hz = clk_get_rate(spfi->spfi_clk);
-	master->min_speed_hz = master->max_speed_hz / 512;
+	master->max_speed_hz = clk_get_rate(spfi->spfi_clk) / 4;
+	master->min_speed_hz = clk_get_rate(spfi->spfi_clk) / 512;
 
-	master->set_cs = img_spfi_set_cs;
+	master->setup = img_spfi_setup;
+	master->cleanup = img_spfi_cleanup;
 	master->transfer_one = img_spfi_transfer_one;
+	master->prepare_message = img_spfi_prepare;
+	master->unprepare_message = img_spfi_unprepare;
+	master->handle_err = img_spfi_handle_err;
 
 	spfi->tx_ch = dma_request_slave_channel(spfi->dev, "tx");
 	spfi->rx_ch = dma_request_slave_channel(spfi->dev, "rx");
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 6fea4af..f08e812 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -370,8 +370,6 @@
 	if (spi_imx->dma_is_inited) {
 		dma = readl(spi_imx->base + MX51_ECSPI_DMA);
 
-		spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2;
-		spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2;
 		spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2;
 		rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET;
 		tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET;
@@ -868,6 +866,8 @@
 	master->max_dma_len = MAX_SDMA_BD_BYTES;
 	spi_imx->bitbang.master->flags = SPI_MASTER_MUST_RX |
 					 SPI_MASTER_MUST_TX;
+	spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2;
+	spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2;
 	spi_imx->dma_is_inited = 1;
 
 	return 0;
@@ -903,7 +903,7 @@
 
 	if (tx) {
 		desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
-					tx->sgl, tx->nents, DMA_TO_DEVICE,
+					tx->sgl, tx->nents, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_tx)
 			goto no_dma;
@@ -915,7 +915,7 @@
 
 	if (rx) {
 		desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
-					rx->sgl, rx->nents, DMA_FROM_DEVICE,
+					rx->sgl, rx->nents, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_rx)
 			goto no_dma;
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index ecae0d4..965d2bd 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -588,7 +588,7 @@
 	return mpc512x_psc_spi_do_remove(&op->dev);
 }
 
-static struct of_device_id mpc512x_psc_spi_of_match[] = {
+static const struct of_device_id mpc512x_psc_spi_of_match[] = {
 	{ .compatible = "fsl,mpc5121-psc-spi", },
 	{},
 };
diff --git a/drivers/spi/spi-octeon.c b/drivers/spi/spi-octeon.c
index b283d53..e99d6a9 100644
--- a/drivers/spi/spi-octeon.c
+++ b/drivers/spi/spi-octeon.c
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-static struct of_device_id octeon_spi_match[] = {
+static const struct of_device_id octeon_spi_match[] = {
 	{ .compatible = "cavium,octeon-3010-spi", },
 	{},
 };
diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index d890d30..35b332d 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -24,6 +24,7 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -294,16 +295,6 @@
 	return ret;
 }
 
-static int omap1_spi100k_prepare_hardware(struct spi_master *master)
-{
-	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
-
-	clk_prepare_enable(spi100k->ick);
-	clk_prepare_enable(spi100k->fck);
-
-	return 0;
-}
-
 static int omap1_spi100k_transfer_one_message(struct spi_master *master,
 					      struct spi_message *m)
 {
@@ -372,16 +363,6 @@
 	return status;
 }
 
-static int omap1_spi100k_unprepare_hardware(struct spi_master *master)
-{
-	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
-
-	clk_disable_unprepare(spi100k->ick);
-	clk_disable_unprepare(spi100k->fck);
-
-	return 0;
-}
-
 static int omap1_spi100k_probe(struct platform_device *pdev)
 {
 	struct spi_master       *master;
@@ -402,14 +383,12 @@
 
 	master->setup = omap1_spi100k_setup;
 	master->transfer_one_message = omap1_spi100k_transfer_one_message;
-	master->prepare_transfer_hardware = omap1_spi100k_prepare_hardware;
-	master->unprepare_transfer_hardware = omap1_spi100k_unprepare_hardware;
-	master->cleanup = NULL;
 	master->num_chipselect = 2;
 	master->mode_bits = MODEBITS;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	master->min_speed_hz = OMAP1_SPI100K_MAX_FREQ/(1<<16);
 	master->max_speed_hz = OMAP1_SPI100K_MAX_FREQ;
+	master->auto_runtime_pm = true;
 
 	spi100k = spi_master_get_devdata(master);
 
@@ -434,22 +413,96 @@
 		goto err;
 	}
 
+	status = clk_prepare_enable(spi100k->ick);
+	if (status != 0) {
+		dev_err(&pdev->dev, "failed to enable ick: %d\n", status);
+		goto err;
+	}
+
+	status = clk_prepare_enable(spi100k->fck);
+	if (status != 0) {
+		dev_err(&pdev->dev, "failed to enable fck: %d\n", status);
+		goto err_ick;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+
 	status = devm_spi_register_master(&pdev->dev, master);
 	if (status < 0)
-		goto err;
+		goto err_fck;
 
 	return status;
 
+err_fck:
+	clk_disable_unprepare(spi100k->fck);
+err_ick:
+	clk_disable_unprepare(spi100k->ick);
 err:
 	spi_master_put(master);
 	return status;
 }
 
+static int omap1_spi100k_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
+	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+
+	pm_runtime_disable(&pdev->dev);
+
+	clk_disable_unprepare(spi100k->fck);
+	clk_disable_unprepare(spi100k->ick);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int omap1_spi100k_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = spi_master_get(dev_get_drvdata(dev));
+	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+
+	clk_disable_unprepare(spi100k->ick);
+	clk_disable_unprepare(spi100k->fck);
+
+	return 0;
+}
+
+static int omap1_spi100k_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = spi_master_get(dev_get_drvdata(dev));
+	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(spi100k->ick);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable ick: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(spi100k->fck);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable fck: %d\n", ret);
+		clk_disable_unprepare(spi100k->ick);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops omap1_spi100k_pm = {
+	SET_RUNTIME_PM_OPS(omap1_spi100k_runtime_suspend,
+			   omap1_spi100k_runtime_resume, NULL)
+};
+
 static struct platform_driver omap1_spi100k_driver = {
 	.driver = {
 		.name		= "omap1_spi100k",
+		.pm		= &omap1_spi100k_pm,
 	},
 	.probe		= omap1_spi100k_probe,
+	.remove		= omap1_spi100k_remove,
 };
 
 module_platform_driver(omap1_spi100k_driver);
diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index 3c08444..55576db 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -44,7 +44,6 @@
 #include <linux/module.h>
 #include <linux/io.h>
 
-#include <asm/irq.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
 
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index ee513a8..94af806 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -285,7 +285,12 @@
  */
 #define DEFAULT_SSP_REG_IMSC  0x0UL
 #define DISABLE_ALL_INTERRUPTS DEFAULT_SSP_REG_IMSC
-#define ENABLE_ALL_INTERRUPTS (~DEFAULT_SSP_REG_IMSC)
+#define ENABLE_ALL_INTERRUPTS ( \
+	SSP_IMSC_MASK_RORIM | \
+	SSP_IMSC_MASK_RTIM | \
+	SSP_IMSC_MASK_RXIM | \
+	SSP_IMSC_MASK_TXIM \
+)
 
 #define CLEAR_ALL_INTERRUPTS  0x3
 
@@ -1251,7 +1256,6 @@
 	struct pl022 *pl022 = dev_id;
 	struct spi_message *msg = pl022->cur_msg;
 	u16 irq_status = 0;
-	u16 flag = 0;
 
 	if (unlikely(!msg)) {
 		dev_err(&pl022->adev->dev,
@@ -1280,9 +1284,6 @@
 		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF)
 			dev_err(&pl022->adev->dev,
 				"RXFIFO is full\n");
-		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
-			dev_err(&pl022->adev->dev,
-				"TXFIFO is full\n");
 
 		/*
 		 * Disable and clear interrupts, disable SSP,
@@ -1303,8 +1304,7 @@
 
 	readwriter(pl022);
 
-	if ((pl022->tx == pl022->tx_end) && (flag == 0)) {
-		flag = 1;
+	if (pl022->tx == pl022->tx_end) {
 		/* Disable Transmit interrupt, enable receive interrupt */
 		writew((readw(SSP_IMSC(pl022->virtbase)) &
 		       ~SSP_IMSC_MASK_TXIM) | SSP_IMSC_MASK_RXIM,
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 6f72ad0..e3223ac 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -20,6 +20,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/spi.h>
@@ -30,10 +31,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/acpi.h>
 
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/delay.h>
-
 #include "spi-pxa2xx.h"
 
 MODULE_AUTHOR("Stephen Street");
@@ -67,54 +64,6 @@
 #define LPSS_TX_LOTHRESH_DFLT	160
 #define LPSS_TX_HITHRESH_DFLT	224
 
-struct quark_spi_rate {
-	u32 bitrate;
-	u32 dds_clk_rate;
-	u32 clk_div;
-};
-
-/*
- * 'rate', 'dds', 'clk_div' lookup table, which is defined in
- * the Quark SPI datasheet.
- */
-static const struct quark_spi_rate quark_spi_rate_table[] = {
-/*	bitrate,	dds_clk_rate,	clk_div */
-	{50000000,	0x800000,	0},
-	{40000000,	0x666666,	0},
-	{25000000,	0x400000,	0},
-	{20000000,	0x666666,	1},
-	{16667000,	0x800000,	2},
-	{13333000,	0x666666,	2},
-	{12500000,	0x200000,	0},
-	{10000000,	0x800000,	4},
-	{8000000,	0x666666,	4},
-	{6250000,	0x400000,	3},
-	{5000000,	0x400000,	4},
-	{4000000,	0x666666,	9},
-	{3125000,	0x80000,	0},
-	{2500000,	0x400000,	9},
-	{2000000,	0x666666,	19},
-	{1563000,	0x40000,	0},
-	{1250000,	0x200000,	9},
-	{1000000,	0x400000,	24},
-	{800000,	0x666666,	49},
-	{781250,	0x20000,	0},
-	{625000,	0x200000,	19},
-	{500000,	0x400000,	49},
-	{400000,	0x666666,	99},
-	{390625,	0x10000,	0},
-	{250000,	0x400000,	99},
-	{200000,	0x666666,	199},
-	{195313,	0x8000,		0},
-	{125000,	0x100000,	49},
-	{100000,	0x200000,	124},
-	{50000,		0x100000,	124},
-	{25000,		0x80000,	124},
-	{10016,		0x20000,	77},
-	{5040,		0x20000,	154},
-	{1002,		0x8000,		194},
-};
-
 /* Offset from drv_data->lpss_base */
 #define GENERAL_REG		0x08
 #define GENERAL_REG_RXTO_HOLDOFF_DISABLE BIT(24)
@@ -701,25 +650,124 @@
 }
 
 /*
- * The Quark SPI data sheet gives a table, and for the given 'rate',
- * the 'dds' and 'clk_div' can be found in the table.
+ * The Quark SPI has an additional 24 bit register (DDS_CLK_RATE) to multiply
+ * input frequency by fractions of 2^24. It also has a divider by 5.
+ *
+ * There are formulas to get baud rate value for given input frequency and
+ * divider parameters, such as DDS_CLK_RATE and SCR:
+ *
+ * Fsys = 200MHz
+ *
+ * Fssp = Fsys * DDS_CLK_RATE / 2^24			(1)
+ * Baud rate = Fsclk = Fssp / (2 * (SCR + 1))		(2)
+ *
+ * DDS_CLK_RATE either 2^n or 2^n / 5.
+ * SCR is in range 0 .. 255
+ *
+ * Divisor = 5^i * 2^j * 2 * k
+ *       i = [0, 1]      i = 1 iff j = 0 or j > 3
+ *       j = [0, 23]     j = 0 iff i = 1
+ *       k = [1, 256]
+ * Special case: j = 0, i = 1: Divisor = 2 / 5
+ *
+ * Accordingly to the specification the recommended values for DDS_CLK_RATE
+ * are:
+ *	Case 1:		2^n, n = [0, 23]
+ *	Case 2:		2^24 * 2 / 5 (0x666666)
+ *	Case 3:		less than or equal to 2^24 / 5 / 16 (0x33333)
+ *
+ * In all cases the lowest possible value is better.
+ *
+ * The function calculates parameters for all cases and chooses the one closest
+ * to the asked baud rate.
  */
-static u32 quark_x1000_set_clk_regvals(u32 rate, u32 *dds, u32 *clk_div)
+static unsigned int quark_x1000_get_clk_div(int rate, u32 *dds)
 {
-	unsigned int i;
+	unsigned long xtal = 200000000;
+	unsigned long fref = xtal / 2;		/* mandatory division by 2,
+						   see (2) */
+						/* case 3 */
+	unsigned long fref1 = fref / 2;		/* case 1 */
+	unsigned long fref2 = fref * 2 / 5;	/* case 2 */
+	unsigned long scale;
+	unsigned long q, q1, q2;
+	long r, r1, r2;
+	u32 mul;
 
-	for (i = 0; i < ARRAY_SIZE(quark_spi_rate_table); i++) {
-		if (rate >= quark_spi_rate_table[i].bitrate) {
-			*dds = quark_spi_rate_table[i].dds_clk_rate;
-			*clk_div = quark_spi_rate_table[i].clk_div;
-			return quark_spi_rate_table[i].bitrate;
+	/* Case 1 */
+
+	/* Set initial value for DDS_CLK_RATE */
+	mul = (1 << 24) >> 1;
+
+	/* Calculate initial quot */
+	q1 = DIV_ROUND_CLOSEST(fref1, rate);
+
+	/* Scale q1 if it's too big */
+	if (q1 > 256) {
+		/* Scale q1 to range [1, 512] */
+		scale = fls_long(q1 - 1);
+		if (scale > 9) {
+			q1 >>= scale - 9;
+			mul >>= scale - 9;
+		}
+
+		/* Round the result if we have a remainder */
+		q1 += q1 & 1;
+	}
+
+	/* Decrease DDS_CLK_RATE as much as we can without loss in precision */
+	scale = __ffs(q1);
+	q1 >>= scale;
+	mul >>= scale;
+
+	/* Get the remainder */
+	r1 = abs(fref1 / (1 << (24 - fls_long(mul))) / q1 - rate);
+
+	/* Case 2 */
+
+	q2 = DIV_ROUND_CLOSEST(fref2, rate);
+	r2 = abs(fref2 / q2 - rate);
+
+	/*
+	 * Choose the best between two: less remainder we have the better. We
+	 * can't go case 2 if q2 is greater than 256 since SCR register can
+	 * hold only values 0 .. 255.
+	 */
+	if (r2 >= r1 || q2 > 256) {
+		/* case 1 is better */
+		r = r1;
+		q = q1;
+	} else {
+		/* case 2 is better */
+		r = r2;
+		q = q2;
+		mul = (1 << 24) * 2 / 5;
+	}
+
+	/* Check case 3 only If the divisor is big enough */
+	if (fref / rate >= 80) {
+		u64 fssp;
+		u32 m;
+
+		/* Calculate initial quot */
+		q1 = DIV_ROUND_CLOSEST(fref, rate);
+		m = (1 << 24) / q1;
+
+		/* Get the remainder */
+		fssp = (u64)fref * m;
+		do_div(fssp, 1 << 24);
+		r1 = abs(fssp - rate);
+
+		/* Choose this one if it suits better */
+		if (r1 < r) {
+			/* case 3 is better */
+			q = 1;
+			mul = m;
 		}
 	}
 
-	*dds = quark_spi_rate_table[i-1].dds_clk_rate;
-	*clk_div = quark_spi_rate_table[i-1].clk_div;
-
-	return quark_spi_rate_table[i-1].bitrate;
+	*dds = mul;
+	return q - 1;
 }
 
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
@@ -730,23 +778,25 @@
 	rate = min_t(int, ssp_clk, rate);
 
 	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
-		return ((ssp_clk / (2 * rate) - 1) & 0xff) << 8;
+		return (ssp_clk / (2 * rate) - 1) & 0xff;
 	else
-		return ((ssp_clk / rate - 1) & 0xfff) << 8;
+		return (ssp_clk / rate - 1) & 0xfff;
 }
 
 static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
 					   struct chip_data *chip, int rate)
 {
-	u32 clk_div;
+	unsigned int clk_div;
 
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
-		quark_x1000_set_clk_regvals(rate, &chip->dds_rate, &clk_div);
-		return clk_div << 8;
+		clk_div = quark_x1000_get_clk_div(rate, &chip->dds_rate);
+		break;
 	default:
-		return ssp_get_clk_div(drv_data, rate);
+		clk_div = ssp_get_clk_div(drv_data, rate);
+		break;
 	}
+	return clk_div << 8;
 }
 
 static void pump_transfers(unsigned long data)
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 2b2c359..810a7fa 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -22,6 +22,8 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 
 #define QUP_CONFIG			0x0000
 #define QUP_STATE			0x0004
@@ -116,6 +118,8 @@
 
 #define SPI_NUM_CHIPSELECTS		4
 
+#define SPI_MAX_DMA_XFER		(SZ_64K - 64)
+
 /* high speed mode is when bus rate is greater then 26MHz */
 #define SPI_HS_MIN_RATE			26000000
 #define SPI_MAX_RATE			50000000
@@ -140,9 +144,14 @@
 	struct completion	done;
 	int			error;
 	int			w_size;	/* bytes per SPI word */
+	int			n_words;
 	int			tx_bytes;
 	int			rx_bytes;
 	int			qup_v1;
+
+	int			use_dma;
+	struct dma_slave_config	rx_conf;
+	struct dma_slave_config	tx_conf;
 };
 
 
@@ -198,7 +207,6 @@
 	return 0;
 }
 
-
 static void spi_qup_fifo_read(struct spi_qup *controller,
 			    struct spi_transfer *xfer)
 {
@@ -266,6 +274,107 @@
 	}
 }
 
+static void spi_qup_dma_done(void *data)
+{
+	struct spi_qup *qup = data;
+
+	complete(&qup->done);
+}
+
+static int spi_qup_prep_sg(struct spi_master *master, struct spi_transfer *xfer,
+			   enum dma_transfer_direction dir,
+			   dma_async_tx_callback callback)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	unsigned long flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sgl;
+	struct dma_chan *chan;
+	dma_cookie_t cookie;
+	unsigned int nents;
+
+	if (dir == DMA_MEM_TO_DEV) {
+		chan = master->dma_tx;
+		nents = xfer->tx_sg.nents;
+		sgl = xfer->tx_sg.sgl;
+	} else {
+		chan = master->dma_rx;
+		nents = xfer->rx_sg.nents;
+		sgl = xfer->rx_sg.sgl;
+	}
+
+	desc = dmaengine_prep_slave_sg(chan, sgl, nents, dir, flags);
+	if (!desc)
+		return -EINVAL;
+
+	desc->callback = callback;
+	desc->callback_param = qup;
+
+	cookie = dmaengine_submit(desc);
+
+	return dma_submit_error(cookie);
+}
+
+static void spi_qup_dma_terminate(struct spi_master *master,
+				  struct spi_transfer *xfer)
+{
+	if (xfer->tx_buf)
+		dmaengine_terminate_all(master->dma_tx);
+	if (xfer->rx_buf)
+		dmaengine_terminate_all(master->dma_rx);
+}
+
+static int spi_qup_do_dma(struct spi_master *master, struct spi_transfer *xfer)
+{
+	dma_async_tx_callback rx_done = NULL, tx_done = NULL;
+	int ret;
+
+	if (xfer->rx_buf)
+		rx_done = spi_qup_dma_done;
+	else if (xfer->tx_buf)
+		tx_done = spi_qup_dma_done;
+
+	if (xfer->rx_buf) {
+		ret = spi_qup_prep_sg(master, xfer, DMA_DEV_TO_MEM, rx_done);
+		if (ret)
+			return ret;
+
+		dma_async_issue_pending(master->dma_rx);
+	}
+
+	if (xfer->tx_buf) {
+		ret = spi_qup_prep_sg(master, xfer, DMA_MEM_TO_DEV, tx_done);
+		if (ret)
+			return ret;
+
+		dma_async_issue_pending(master->dma_tx);
+	}
+
+	return 0;
+}
+
+static int spi_qup_do_pio(struct spi_master *master, struct spi_transfer *xfer)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	int ret;
+
+	ret = spi_qup_set_state(qup, QUP_STATE_RUN);
+	if (ret) {
+		dev_warn(qup->dev, "cannot set RUN state\n");
+		return ret;
+	}
+
+	ret = spi_qup_set_state(qup, QUP_STATE_PAUSE);
+	if (ret) {
+		dev_warn(qup->dev, "cannot set PAUSE state\n");
+		return ret;
+	}
+
+	spi_qup_fifo_write(qup, xfer);
+
+	return 0;
+}
+
 static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 {
 	struct spi_qup *controller = dev_id;
@@ -315,11 +424,13 @@
 		error = -EIO;
 	}
 
-	if (opflags & QUP_OP_IN_SERVICE_FLAG)
-		spi_qup_fifo_read(controller, xfer);
+	if (!controller->use_dma) {
+		if (opflags & QUP_OP_IN_SERVICE_FLAG)
+			spi_qup_fifo_read(controller, xfer);
 
-	if (opflags & QUP_OP_OUT_SERVICE_FLAG)
-		spi_qup_fifo_write(controller, xfer);
+		if (opflags & QUP_OP_OUT_SERVICE_FLAG)
+			spi_qup_fifo_write(controller, xfer);
+	}
 
 	spin_lock_irqsave(&controller->lock, flags);
 	controller->error = error;
@@ -332,13 +443,35 @@
 	return IRQ_HANDLED;
 }
 
+static u32
+spi_qup_get_mode(struct spi_master *master, struct spi_transfer *xfer)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	u32 mode;
+
+	qup->w_size = 4;
+
+	if (xfer->bits_per_word <= 8)
+		qup->w_size = 1;
+	else if (xfer->bits_per_word <= 16)
+		qup->w_size = 2;
+
+	qup->n_words = xfer->len / qup->w_size;
+
+	if (qup->n_words <= (qup->in_fifo_sz / sizeof(u32)))
+		mode = QUP_IO_M_MODE_FIFO;
+	else
+		mode = QUP_IO_M_MODE_BLOCK;
+
+	return mode;
+}
 
 /* set clock freq ... bits per word */
 static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
 {
 	struct spi_qup *controller = spi_master_get_devdata(spi->master);
 	u32 config, iomode, mode, control;
-	int ret, n_words, w_size;
+	int ret, n_words;
 
 	if (spi->mode & SPI_LOOP && xfer->len > controller->in_fifo_sz) {
 		dev_err(controller->dev, "too big size for loopback %d > %d\n",
@@ -358,35 +491,54 @@
 		return -EIO;
 	}
 
-	w_size = 4;
-	if (xfer->bits_per_word <= 8)
-		w_size = 1;
-	else if (xfer->bits_per_word <= 16)
-		w_size = 2;
+	mode = spi_qup_get_mode(spi->master, xfer);
+	n_words = controller->n_words;
 
-	n_words = xfer->len / w_size;
-	controller->w_size = w_size;
-
-	if (n_words <= (controller->in_fifo_sz / sizeof(u32))) {
-		mode = QUP_IO_M_MODE_FIFO;
+	if (mode == QUP_IO_M_MODE_FIFO) {
 		writel_relaxed(n_words, controller->base + QUP_MX_READ_CNT);
 		writel_relaxed(n_words, controller->base + QUP_MX_WRITE_CNT);
 		/* must be zero for FIFO */
 		writel_relaxed(0, controller->base + QUP_MX_INPUT_CNT);
 		writel_relaxed(0, controller->base + QUP_MX_OUTPUT_CNT);
-	} else {
-		mode = QUP_IO_M_MODE_BLOCK;
+	} else if (!controller->use_dma) {
 		writel_relaxed(n_words, controller->base + QUP_MX_INPUT_CNT);
 		writel_relaxed(n_words, controller->base + QUP_MX_OUTPUT_CNT);
 		/* must be zero for BLOCK and BAM */
 		writel_relaxed(0, controller->base + QUP_MX_READ_CNT);
 		writel_relaxed(0, controller->base + QUP_MX_WRITE_CNT);
+	} else {
+		mode = QUP_IO_M_MODE_BAM;
+		writel_relaxed(0, controller->base + QUP_MX_READ_CNT);
+		writel_relaxed(0, controller->base + QUP_MX_WRITE_CNT);
+
+		if (!controller->qup_v1) {
+			void __iomem *input_cnt;
+
+			input_cnt = controller->base + QUP_MX_INPUT_CNT;
+			/*
+			 * for DMA transfers, both QUP_MX_INPUT_CNT and
+			 * QUP_MX_OUTPUT_CNT must be zero to all cases but one.
+			 * That case is a non-balanced transfer when there is
+			 * only a rx_buf.
+			 */
+			if (xfer->tx_buf)
+				writel_relaxed(0, input_cnt);
+			else
+				writel_relaxed(n_words, input_cnt);
+
+			writel_relaxed(0, controller->base + QUP_MX_OUTPUT_CNT);
+		}
 	}
 
 	iomode = readl_relaxed(controller->base + QUP_IO_M_MODES);
 	/* Set input and output transfer mode */
 	iomode &= ~(QUP_IO_M_INPUT_MODE_MASK | QUP_IO_M_OUTPUT_MODE_MASK);
-	iomode &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
+
+	if (!controller->use_dma)
+		iomode &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
+	else
+		iomode |= QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN;
+
 	iomode |= (mode << QUP_IO_M_OUTPUT_MODE_MASK_SHIFT);
 	iomode |= (mode << QUP_IO_M_INPUT_MODE_MASK_SHIFT);
 
@@ -428,11 +580,31 @@
 	config &= ~(QUP_CONFIG_NO_INPUT | QUP_CONFIG_NO_OUTPUT | QUP_CONFIG_N);
 	config |= xfer->bits_per_word - 1;
 	config |= QUP_CONFIG_SPI_MODE;
+
+	if (controller->use_dma) {
+		if (!xfer->tx_buf)
+			config |= QUP_CONFIG_NO_OUTPUT;
+		if (!xfer->rx_buf)
+			config |= QUP_CONFIG_NO_INPUT;
+	}
+
 	writel_relaxed(config, controller->base + QUP_CONFIG);
 
 	/* only write to OPERATIONAL_MASK when register is present */
-	if (!controller->qup_v1)
-		writel_relaxed(0, controller->base + QUP_OPERATIONAL_MASK);
+	if (!controller->qup_v1) {
+		u32 mask = 0;
+
+		/*
+		 * mask INPUT and OUTPUT service flags to prevent IRQs on FIFO
+		 * status change in BAM mode
+		 */
+
+		if (mode == QUP_IO_M_MODE_BAM)
+			mask = QUP_OP_IN_SERVICE_FLAG | QUP_OP_OUT_SERVICE_FLAG;
+
+		writel_relaxed(mask, controller->base + QUP_OPERATIONAL_MASK);
+	}
+
 	return 0;
 }
 
@@ -461,17 +633,13 @@
 	controller->tx_bytes = 0;
 	spin_unlock_irqrestore(&controller->lock, flags);
 
-	if (spi_qup_set_state(controller, QUP_STATE_RUN)) {
-		dev_warn(controller->dev, "cannot set RUN state\n");
-		goto exit;
-	}
+	if (controller->use_dma)
+		ret = spi_qup_do_dma(master, xfer);
+	else
+		ret = spi_qup_do_pio(master, xfer);
 
-	if (spi_qup_set_state(controller, QUP_STATE_PAUSE)) {
-		dev_warn(controller->dev, "cannot set PAUSE state\n");
+	if (ret)
 		goto exit;
-	}
-
-	spi_qup_fifo_write(controller, xfer);
 
 	if (spi_qup_set_state(controller, QUP_STATE_RUN)) {
 		dev_warn(controller->dev, "cannot set EXECUTE state\n");
@@ -480,6 +648,7 @@
 
 	if (!wait_for_completion_timeout(&controller->done, timeout))
 		ret = -ETIMEDOUT;
+
 exit:
 	spi_qup_set_state(controller, QUP_STATE_RESET);
 	spin_lock_irqsave(&controller->lock, flags);
@@ -487,6 +656,97 @@
 	if (!ret)
 		ret = controller->error;
 	spin_unlock_irqrestore(&controller->lock, flags);
+
+	if (ret && controller->use_dma)
+		spi_qup_dma_terminate(master, xfer);
+
+	return ret;
+}
+
+static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
+			    struct spi_transfer *xfer)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	size_t dma_align = dma_get_cache_alignment();
+	u32 mode;
+
+	qup->use_dma = 0;
+
+	if (xfer->rx_buf && (xfer->len % qup->in_blk_sz ||
+	    IS_ERR_OR_NULL(master->dma_rx) ||
+	    !IS_ALIGNED((size_t)xfer->rx_buf, dma_align)))
+		return false;
+
+	if (xfer->tx_buf && (xfer->len % qup->out_blk_sz ||
+	    IS_ERR_OR_NULL(master->dma_tx) ||
+	    !IS_ALIGNED((size_t)xfer->tx_buf, dma_align)))
+		return false;
+
+	mode = spi_qup_get_mode(master, xfer);
+	if (mode == QUP_IO_M_MODE_FIFO)
+		return false;
+
+	qup->use_dma = 1;
+
+	return true;
+}
+
+static void spi_qup_release_dma(struct spi_master *master)
+{
+	if (!IS_ERR_OR_NULL(master->dma_rx))
+		dma_release_channel(master->dma_rx);
+	if (!IS_ERR_OR_NULL(master->dma_tx))
+		dma_release_channel(master->dma_tx);
+}
+
+static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
+{
+	struct spi_qup *spi = spi_master_get_devdata(master);
+	struct dma_slave_config *rx_conf = &spi->rx_conf,
+				*tx_conf = &spi->tx_conf;
+	struct device *dev = spi->dev;
+	int ret;
+
+	/* allocate dma resources, if available */
+	master->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	if (IS_ERR(master->dma_rx))
+		return PTR_ERR(master->dma_rx);
+
+	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR(master->dma_tx)) {
+		ret = PTR_ERR(master->dma_tx);
+		goto err_tx;
+	}
+
+	/* set DMA parameters */
+	rx_conf->direction = DMA_DEV_TO_MEM;
+	rx_conf->device_fc = 1;
+	rx_conf->src_addr = base + QUP_INPUT_FIFO;
+	rx_conf->src_maxburst = spi->in_blk_sz;
+
+	tx_conf->direction = DMA_MEM_TO_DEV;
+	tx_conf->device_fc = 1;
+	tx_conf->dst_addr = base + QUP_OUTPUT_FIFO;
+	tx_conf->dst_maxburst = spi->out_blk_sz;
+
+	ret = dmaengine_slave_config(master->dma_rx, rx_conf);
+	if (ret) {
+		dev_err(dev, "failed to configure RX channel\n");
+		goto err;
+	}
+
+	ret = dmaengine_slave_config(master->dma_tx, tx_conf);
+	if (ret) {
+		dev_err(dev, "failed to configure TX channel\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	dma_release_channel(master->dma_tx);
+err_tx:
+	dma_release_channel(master->dma_rx);
 	return ret;
 }
 
@@ -563,6 +823,8 @@
 	master->transfer_one = spi_qup_transfer_one;
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
+	master->dma_alignment = dma_get_cache_alignment();
+	master->max_dma_len = SPI_MAX_DMA_XFER;
 
 	platform_set_drvdata(pdev, master);
 
@@ -574,6 +836,12 @@
 	controller->cclk = cclk;
 	controller->irq = irq;
 
+	ret = spi_qup_init_dma(master, res->start);
+	if (ret == -EPROBE_DEFER)
+		goto error;
+	else if (!ret)
+		master->can_dma = spi_qup_can_dma;
+
 	/* set v1 flag if device is version 1 */
 	if (of_device_is_compatible(dev->of_node, "qcom,spi-qup-v1.1.1"))
 		controller->qup_v1 = 1;
@@ -610,7 +878,7 @@
 	ret = spi_qup_set_state(controller, QUP_STATE_RESET);
 	if (ret) {
 		dev_err(dev, "cannot set RESET state\n");
-		goto error;
+		goto error_dma;
 	}
 
 	writel_relaxed(0, base + QUP_OPERATIONAL);
@@ -634,7 +902,7 @@
 	ret = devm_request_irq(dev, irq, spi_qup_qup_irq,
 			       IRQF_TRIGGER_HIGH, pdev->name, controller);
 	if (ret)
-		goto error;
+		goto error_dma;
 
 	pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
 	pm_runtime_use_autosuspend(dev);
@@ -649,6 +917,8 @@
 
 disable_pm:
 	pm_runtime_disable(&pdev->dev);
+error_dma:
+	spi_qup_release_dma(master);
 error:
 	clk_disable_unprepare(cclk);
 	clk_disable_unprepare(iclk);
@@ -740,6 +1010,8 @@
 	if (ret)
 		return ret;
 
+	spi_qup_release_dma(master);
+
 	clk_disable_unprepare(controller->cclk);
 	clk_disable_unprepare(controller->iclk);
 
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 1a777dc..68e7efe 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -179,6 +179,7 @@
 	u8 tmode;
 	u8 bpw;
 	u8 n_bytes;
+	u8 rsd_nsecs;
 	unsigned len;
 	u32 speed;
 
@@ -302,8 +303,8 @@
 	return 0;
 }
 
-static int rockchip_spi_unprepare_message(struct spi_master *master,
-					  struct spi_message *msg)
+static void rockchip_spi_handle_err(struct spi_master *master,
+				    struct spi_message *msg)
 {
 	unsigned long flags;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
@@ -313,8 +314,8 @@
 	/*
 	 * For DMA mode, we need terminate DMA channel and flush
 	 * fifo for the next transfer if DMA thansfer timeout.
-	 * unprepare_message() was called by core if transfer complete
-	 * or timeout. Maybe it is reasonable for error handling here.
+	 * handle_err() was called by core if transfer failed.
+	 * Maybe it is reasonable for error handling here.
 	 */
 	if (rs->use_dma) {
 		if (rs->state & RXBUSY) {
@@ -327,6 +328,12 @@
 	}
 
 	spin_unlock_irqrestore(&rs->lock, flags);
+}
+
+static int rockchip_spi_unprepare_message(struct spi_master *master,
+					  struct spi_message *msg)
+{
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
 	spi_enable_chip(rs, 0);
 
@@ -493,6 +500,7 @@
 {
 	u32 div = 0;
 	u32 dmacr = 0;
+	int rsd = 0;
 
 	u32 cr0 = (CR0_BHT_8BIT << CR0_BHT_OFFSET)
 		| (CR0_SSD_ONE << CR0_SSD_OFFSET);
@@ -519,9 +527,23 @@
 	}
 
 	/* div doesn't support odd number */
-	div = max_t(u32, rs->max_freq / rs->speed, 1);
+	div = DIV_ROUND_UP(rs->max_freq, rs->speed);
 	div = (div + 1) & 0xfffe;
 
+	/* Rx sample delay is expressed in parent clock cycles (max 3) */
+	rsd = DIV_ROUND_CLOSEST(rs->rsd_nsecs * (rs->max_freq >> 8),
+				1000000000 >> 8);
+	if (!rsd && rs->rsd_nsecs) {
+		pr_warn_once("rockchip-spi: %u Hz are too slow to express %u ns delay\n",
+			     rs->max_freq, rs->rsd_nsecs);
+	} else if (rsd > 3) {
+		rsd = 3;
+		pr_warn_once("rockchip-spi: %u Hz are too fast to express %u ns delay, clamping at %u ns\n",
+			     rs->max_freq, rs->rsd_nsecs,
+			     rsd * 1000000000U / rs->max_freq);
+	}
+	cr0 |= rsd << CR0_RSD_OFFSET;
+
 	writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
 
 	writel_relaxed(rs->len - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
@@ -614,6 +636,7 @@
 	struct rockchip_spi *rs;
 	struct spi_master *master;
 	struct resource *mem;
+	u32 rsd_nsecs;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct rockchip_spi));
 	if (!master)
@@ -665,6 +688,10 @@
 	rs->dev = &pdev->dev;
 	rs->max_freq = clk_get_rate(rs->spiclk);
 
+	if (!of_property_read_u32(pdev->dev.of_node, "rx-sample-delay-ns",
+				  &rsd_nsecs))
+		rs->rsd_nsecs = rsd_nsecs;
+
 	rs->fifo_len = get_fifo_len(rs);
 	if (!rs->fifo_len) {
 		dev_err(&pdev->dev, "Failed to get fifo length\n");
@@ -688,6 +715,7 @@
 	master->prepare_message = rockchip_spi_prepare_message;
 	master->unprepare_message = rockchip_spi_unprepare_message;
 	master->transfer_one = rockchip_spi_transfer_one;
+	master->handle_err = rockchip_spi_handle_err;
 
 	rs->dma_tx.ch = dma_request_slave_channel(rs->dev, "tx");
 	if (!rs->dma_tx.ch)
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 46ce470..186924a 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -177,6 +177,13 @@
 #define SPBFCR_RXRST		0x40	/* Receive Buffer Data Reset */
 #define SPBFCR_TXTRG_MASK	0x30	/* Transmit Buffer Data Triggering Number */
 #define SPBFCR_RXTRG_MASK	0x07	/* Receive Buffer Data Triggering Number */
+/* QSPI on R-Car Gen2 */
+#define SPBFCR_TXTRG_1B		0x00	/* 31 bytes (1 byte available) */
+#define SPBFCR_TXTRG_32B	0x30	/* 0 byte (32 bytes available) */
+#define SPBFCR_RXTRG_1B		0x00	/* 1 byte (31 bytes available) */
+#define SPBFCR_RXTRG_32B	0x07	/* 32 bytes (0 byte available) */
+
+#define QSPI_BUFFER_SIZE        32u
 
 struct rspi_data {
 	void __iomem *addr;
@@ -366,6 +373,52 @@
 	return 0;
 }
 
+static void qspi_update(const struct rspi_data *rspi, u8 mask, u8 val, u8 reg)
+{
+	u8 data;
+
+	data = rspi_read8(rspi, reg);
+	data &= ~mask;
+	data |= (val & mask);
+	rspi_write8(rspi, data, reg);
+}
+
+static int qspi_set_send_trigger(struct rspi_data *rspi, unsigned int len)
+{
+	unsigned int n;
+
+	n = min(len, QSPI_BUFFER_SIZE);
+
+	if (len >= QSPI_BUFFER_SIZE) {
+		/* sets triggering number to 32 bytes */
+		qspi_update(rspi, SPBFCR_TXTRG_MASK,
+			     SPBFCR_TXTRG_32B, QSPI_SPBFCR);
+	} else {
+		/* sets triggering number to 1 byte */
+		qspi_update(rspi, SPBFCR_TXTRG_MASK,
+			     SPBFCR_TXTRG_1B, QSPI_SPBFCR);
+	}
+
+	return n;
+}
+
+static void qspi_set_receive_trigger(struct rspi_data *rspi, unsigned int len)
+{
+	unsigned int n;
+
+	n = min(len, QSPI_BUFFER_SIZE);
+
+	if (len >= QSPI_BUFFER_SIZE) {
+		/* sets triggering number to 32 bytes */
+		qspi_update(rspi, SPBFCR_RXTRG_MASK,
+			     SPBFCR_RXTRG_32B, QSPI_SPBFCR);
+	} else {
+		/* sets triggering number to 1 byte */
+		qspi_update(rspi, SPBFCR_RXTRG_MASK,
+			     SPBFCR_RXTRG_1B, QSPI_SPBFCR);
+	}
+}
+
 #define set_config_register(spi, n) spi->ops->set_config_register(spi, n)
 
 static void rspi_enable_irq(const struct rspi_data *rspi, u8 enable)
@@ -609,18 +662,28 @@
 	return __rspi_can_dma(rspi, xfer);
 }
 
+static int rspi_dma_check_then_transfer(struct rspi_data *rspi,
+					 struct spi_transfer *xfer)
+{
+	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
+		/* rx_buf can be NULL on RSPI on SH in TX-only Mode */
+		int ret = rspi_dma_transfer(rspi, &xfer->tx_sg,
+					xfer->rx_buf ? &xfer->rx_sg : NULL);
+		if (ret != -EAGAIN)
+			return 0;
+	}
+
+	return -EAGAIN;
+}
+
 static int rspi_common_transfer(struct rspi_data *rspi,
 				struct spi_transfer *xfer)
 {
 	int ret;
 
-	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
-		/* rx_buf can be NULL on RSPI on SH in TX-only Mode */
-		ret = rspi_dma_transfer(rspi, &xfer->tx_sg,
-					xfer->rx_buf ? &xfer->rx_sg : NULL);
-		if (ret != -EAGAIN)
-			return ret;
-	}
+	ret = rspi_dma_check_then_transfer(rspi, xfer);
+	if (ret != -EAGAIN)
+		return ret;
 
 	ret = rspi_pio_transfer(rspi, xfer->tx_buf, xfer->rx_buf, xfer->len);
 	if (ret < 0)
@@ -661,12 +724,59 @@
 	return rspi_common_transfer(rspi, xfer);
 }
 
+static int qspi_trigger_transfer_out_int(struct rspi_data *rspi, const u8 *tx,
+					u8 *rx, unsigned int len)
+{
+	int i, n, ret;
+	int error;
+
+	while (len > 0) {
+		n = qspi_set_send_trigger(rspi, len);
+		qspi_set_receive_trigger(rspi, len);
+		if (n == QSPI_BUFFER_SIZE) {
+			error = rspi_wait_for_tx_empty(rspi);
+			if (error < 0) {
+				dev_err(&rspi->master->dev, "transmit timeout\n");
+				return error;
+			}
+			for (i = 0; i < n; i++)
+				rspi_write_data(rspi, *tx++);
+
+			error = rspi_wait_for_rx_full(rspi);
+			if (error < 0) {
+				dev_err(&rspi->master->dev, "receive timeout\n");
+				return error;
+			}
+			for (i = 0; i < n; i++)
+				*rx++ = rspi_read_data(rspi);
+		} else {
+			ret = rspi_pio_transfer(rspi, tx, rx, n);
+			if (ret < 0)
+				return ret;
+		}
+		len -= n;
+	}
+
+	return 0;
+}
+
 static int qspi_transfer_out_in(struct rspi_data *rspi,
 				struct spi_transfer *xfer)
 {
+	int ret;
+
 	qspi_receive_init(rspi);
 
-	return rspi_common_transfer(rspi, xfer);
+	ret = rspi_dma_check_then_transfer(rspi, xfer);
+	if (ret != -EAGAIN)
+		return ret;
+
+	ret = qspi_trigger_transfer_out_int(rspi, xfer->tx_buf,
+					    xfer->rx_buf, xfer->len);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static int qspi_transfer_out(struct rspi_data *rspi, struct spi_transfer *xfer)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 9231c34..b1c6731 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -324,7 +324,7 @@
 
 		/* Acquire DMA channels */
 		sdd->rx_dma.ch = dma_request_slave_channel_compat(mask, filter,
-				   (void *)sdd->rx_dma.dmach, dev, "rx");
+				   (void *)(long)sdd->rx_dma.dmach, dev, "rx");
 		if (!sdd->rx_dma.ch) {
 			dev_err(dev, "Failed to get RX DMA channel\n");
 			ret = -EBUSY;
@@ -333,7 +333,7 @@
 		spi->dma_rx = sdd->rx_dma.ch;
 
 		sdd->tx_dma.ch = dma_request_slave_channel_compat(mask, filter,
-				   (void *)sdd->tx_dma.dmach, dev, "tx");
+				   (void *)(long)sdd->tx_dma.dmach, dev, "tx");
 		if (!sdd->tx_dma.ch) {
 			dev_err(dev, "Failed to get TX DMA channel\n");
 			ret = -EBUSY;
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 5a56acf..36af4d4 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -286,7 +286,7 @@
 			hw->freq = SC18IS602_CLOCK;
 		break;
 	}
-	master->bus_num = client->adapter->nr;
+	master->bus_num = np ? -1 : client->adapter->nr;
 	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->setup = sc18is602_setup;
diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c
index 2faeaa7..f17c0ab 100644
--- a/drivers/spi/spi-st-ssc4.c
+++ b/drivers/spi/spi-st-ssc4.c
@@ -482,7 +482,7 @@
 	SET_RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL)
 };
 
-static struct of_device_id stm_spi_match[] = {
+static const struct of_device_id stm_spi_match[] = {
 	{ .compatible = "st,comms-ssc4-spi", },
 	{},
 };
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 57a1950..d5d7d22 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -16,7 +16,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/kmod.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/cache.h>
@@ -129,125 +128,11 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int spi_legacy_suspend(struct device *dev, pm_message_t message)
-{
-	int			value = 0;
-	struct spi_driver	*drv = to_spi_driver(dev->driver);
-
-	/* suspend will stop irqs and dma; no more i/o */
-	if (drv) {
-		if (drv->suspend)
-			value = drv->suspend(to_spi_device(dev), message);
-		else
-			dev_dbg(dev, "... can't suspend\n");
-	}
-	return value;
-}
-
-static int spi_legacy_resume(struct device *dev)
-{
-	int			value = 0;
-	struct spi_driver	*drv = to_spi_driver(dev->driver);
-
-	/* resume may restart the i/o queue */
-	if (drv) {
-		if (drv->resume)
-			value = drv->resume(to_spi_device(dev));
-		else
-			dev_dbg(dev, "... can't resume\n");
-	}
-	return value;
-}
-
-static int spi_pm_suspend(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_suspend(dev);
-	else
-		return spi_legacy_suspend(dev, PMSG_SUSPEND);
-}
-
-static int spi_pm_resume(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_resume(dev);
-	else
-		return spi_legacy_resume(dev);
-}
-
-static int spi_pm_freeze(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_freeze(dev);
-	else
-		return spi_legacy_suspend(dev, PMSG_FREEZE);
-}
-
-static int spi_pm_thaw(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_thaw(dev);
-	else
-		return spi_legacy_resume(dev);
-}
-
-static int spi_pm_poweroff(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_poweroff(dev);
-	else
-		return spi_legacy_suspend(dev, PMSG_HIBERNATE);
-}
-
-static int spi_pm_restore(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_restore(dev);
-	else
-		return spi_legacy_resume(dev);
-}
-#else
-#define spi_pm_suspend	NULL
-#define spi_pm_resume	NULL
-#define spi_pm_freeze	NULL
-#define spi_pm_thaw	NULL
-#define spi_pm_poweroff	NULL
-#define spi_pm_restore	NULL
-#endif
-
-static const struct dev_pm_ops spi_pm = {
-	.suspend = spi_pm_suspend,
-	.resume = spi_pm_resume,
-	.freeze = spi_pm_freeze,
-	.thaw = spi_pm_thaw,
-	.poweroff = spi_pm_poweroff,
-	.restore = spi_pm_restore,
-	SET_RUNTIME_PM_OPS(
-		pm_generic_runtime_suspend,
-		pm_generic_runtime_resume,
-		NULL
-	)
-};
-
 struct bus_type spi_bus_type = {
 	.name		= "spi",
 	.dev_groups	= spi_dev_groups,
 	.match		= spi_match_device,
 	.uevent		= spi_uevent,
-	.pm		= &spi_pm,
 };
 EXPORT_SYMBOL_GPL(spi_bus_type);
 
@@ -851,6 +736,9 @@
 	if (msg->status == -EINPROGRESS)
 		msg->status = ret;
 
+	if (msg->status && master->handle_err)
+		master->handle_err(master, msg);
+
 	spi_finalize_current_message(master);
 
 	return ret;
@@ -1360,7 +1248,6 @@
 	spi->dev.of_node = nc;
 
 	/* Register the new device */
-	request_module("%s%s", SPI_MODULE_PREFIX, spi->modalias);
 	rc = spi_add_device(spi);
 	if (rc) {
 		dev_err(&master->dev, "spi_device register error %s\n",
@@ -1894,6 +1781,8 @@
 	if (!spi->max_speed_hz)
 		spi->max_speed_hz = spi->master->max_speed_hz;
 
+	spi_set_cs(spi, false);
+
 	if (spi->master->setup)
 		status = spi->master->setup(spi);
 
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 4eb7a98..92c909e 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -223,7 +223,7 @@
 	struct spi_transfer	*k_xfers;
 	struct spi_transfer	*k_tmp;
 	struct spi_ioc_transfer *u_tmp;
-	unsigned		n, total;
+	unsigned		n, total, tx_total, rx_total;
 	u8			*tx_buf, *rx_buf;
 	int			status = -EFAULT;
 
@@ -239,33 +239,52 @@
 	tx_buf = spidev->tx_buffer;
 	rx_buf = spidev->rx_buffer;
 	total = 0;
+	tx_total = 0;
+	rx_total = 0;
 	for (n = n_xfers, k_tmp = k_xfers, u_tmp = u_xfers;
 			n;
 			n--, k_tmp++, u_tmp++) {
 		k_tmp->len = u_tmp->len;
 
 		total += k_tmp->len;
-		if (total > bufsiz) {
+		/* Since the function returns the total length of transfers
+		 * on success, restrict the total to positive int values to
+		 * avoid the return value looking like an error.  Also check
+		 * each transfer length to avoid arithmetic overflow.
+		 */
+		if (total > INT_MAX || k_tmp->len > INT_MAX) {
 			status = -EMSGSIZE;
 			goto done;
 		}
 
 		if (u_tmp->rx_buf) {
+			/* this transfer needs space in RX bounce buffer */
+			rx_total += k_tmp->len;
+			if (rx_total > bufsiz) {
+				status = -EMSGSIZE;
+				goto done;
+			}
 			k_tmp->rx_buf = rx_buf;
 			if (!access_ok(VERIFY_WRITE, (u8 __user *)
 						(uintptr_t) u_tmp->rx_buf,
 						u_tmp->len))
 				goto done;
+			rx_buf += k_tmp->len;
 		}
 		if (u_tmp->tx_buf) {
+			/* this transfer needs space in TX bounce buffer */
+			tx_total += k_tmp->len;
+			if (tx_total > bufsiz) {
+				status = -EMSGSIZE;
+				goto done;
+			}
 			k_tmp->tx_buf = tx_buf;
 			if (copy_from_user(tx_buf, (const u8 __user *)
 						(uintptr_t) u_tmp->tx_buf,
 					u_tmp->len))
 				goto done;
+			tx_buf += k_tmp->len;
 		}
-		tx_buf += k_tmp->len;
-		rx_buf += k_tmp->len;
 
 		k_tmp->cs_change = !!u_tmp->cs_change;
 		k_tmp->tx_nbits = u_tmp->tx_nbits;
@@ -303,8 +322,8 @@
 				status = -EFAULT;
 				goto done;
 			}
+			rx_buf += u_tmp->len;
 		}
-		rx_buf += u_tmp->len;
 	}
 	status = total;
 
@@ -684,6 +703,14 @@
 
 static struct class *spidev_class;
 
+#ifdef CONFIG_OF
+static const struct of_device_id spidev_dt_ids[] = {
+	{ .compatible = "rohm,dh2228fv" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, spidev_dt_ids);
+#endif
+
 /*-------------------------------------------------------------------------*/
 
 static int spidev_probe(struct spi_device *spi)
@@ -692,6 +719,17 @@
 	int			status;
 	unsigned long		minor;
 
+	/*
+	 * spidev should never be referenced in DT without a specific
+	 * compatbile string, it is a Linux implementation thing
+	 * rather than a description of the hardware.
+	 */
+	if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) {
+		dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n");
+		WARN_ON(spi->dev.of_node &&
+			!of_match_device(spidev_dt_ids, &spi->dev));
+	}
+
 	/* Allocate driver data */
 	spidev = kzalloc(sizeof(*spidev), GFP_KERNEL);
 	if (!spidev)
@@ -758,13 +796,6 @@
 	return 0;
 }
 
-static const struct of_device_id spidev_dt_ids[] = {
-	{ .compatible = "rohm,dh2228fv" },
-	{},
-};
-
-MODULE_DEVICE_TABLE(of, spidev_dt_ids);
-
 static struct spi_driver spidev_spi_driver = {
 	.driver = {
 		.name =		"spidev",
diff --git a/drivers/staging/nvec/nvec_power.c b/drivers/staging/nvec/nvec_power.c
index 6a1459d..30b66c3 100644
--- a/drivers/staging/nvec/nvec_power.c
+++ b/drivers/staging/nvec/nvec_power.c
@@ -82,8 +82,8 @@
 	};
 };
 
-static struct power_supply nvec_bat_psy;
-static struct power_supply nvec_psy;
+static struct power_supply *nvec_bat_psy;
+static struct power_supply *nvec_psy;
 
 static int nvec_power_notifier(struct notifier_block *nb,
 			       unsigned long event_type, void *data)
@@ -98,7 +98,7 @@
 	if (res->sub_type == 0) {
 		if (power->on != res->plu) {
 			power->on = res->plu;
-			power_supply_changed(&nvec_psy);
+			power_supply_changed(nvec_psy);
 		}
 		return NOTIFY_STOP;
 	}
@@ -167,7 +167,7 @@
 		}
 		power->bat_cap = res->plc[1];
 		if (status_changed)
-			power_supply_changed(&nvec_bat_psy);
+			power_supply_changed(nvec_bat_psy);
 		break;
 	case VOLTAGE:
 		power->bat_voltage_now = res->plu * 1000;
@@ -225,7 +225,7 @@
 				   enum power_supply_property psp,
 				   union power_supply_propval *val)
 {
-	struct nvec_power *power = dev_get_drvdata(psy->dev->parent);
+	struct nvec_power *power = dev_get_drvdata(psy->dev.parent);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_ONLINE:
@@ -241,7 +241,7 @@
 				     enum power_supply_property psp,
 				     union power_supply_propval *val)
 {
-	struct nvec_power *power = dev_get_drvdata(psy->dev->parent);
+	struct nvec_power *power = dev_get_drvdata(psy->dev.parent);
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -323,7 +323,7 @@
 	"battery",
 };
 
-static struct power_supply nvec_bat_psy = {
+static const struct power_supply_desc nvec_bat_psy_desc = {
 	.name = "battery",
 	.type = POWER_SUPPLY_TYPE_BATTERY,
 	.properties = nvec_battery_props,
@@ -331,11 +331,9 @@
 	.get_property = nvec_battery_get_property,
 };
 
-static struct power_supply nvec_psy = {
+static const struct power_supply_desc nvec_psy_desc = {
 	.name = "ac",
 	.type = POWER_SUPPLY_TYPE_MAINS,
-	.supplied_to = nvec_power_supplied_to,
-	.num_supplicants = ARRAY_SIZE(nvec_power_supplied_to),
 	.properties = nvec_power_props,
 	.num_properties = ARRAY_SIZE(nvec_power_props),
 	.get_property = nvec_power_get_property,
@@ -373,9 +371,11 @@
 
 static int nvec_power_probe(struct platform_device *pdev)
 {
-	struct power_supply *psy;
+	struct power_supply **psy;
+	const struct power_supply_desc *psy_desc;
 	struct nvec_power *power;
 	struct nvec_chip *nvec = dev_get_drvdata(pdev->dev.parent);
+	struct power_supply_config psy_cfg = {};
 
 	power = devm_kzalloc(&pdev->dev, sizeof(struct nvec_power), GFP_NOWAIT);
 	if (power == NULL)
@@ -387,6 +387,9 @@
 	switch (pdev->id) {
 	case AC:
 		psy = &nvec_psy;
+		psy_desc = &nvec_psy_desc;
+		psy_cfg.supplied_to = nvec_power_supplied_to;
+		psy_cfg.num_supplicants = ARRAY_SIZE(nvec_power_supplied_to);
 
 		power->notifier.notifier_call = nvec_power_notifier;
 
@@ -395,6 +398,7 @@
 		break;
 	case BAT:
 		psy = &nvec_bat_psy;
+		psy_desc = &nvec_bat_psy_desc;
 
 		power->notifier.notifier_call = nvec_power_bat_notifier;
 		break;
@@ -407,7 +411,9 @@
 	if (pdev->id == BAT)
 		get_bat_mfg_data(power);
 
-	return power_supply_register(&pdev->dev, psy);
+	*psy = power_supply_register(&pdev->dev, psy_desc, &psy_cfg);
+
+	return PTR_ERR_OR_ZERO(*psy);
 }
 
 static int nvec_power_remove(struct platform_device *pdev)
@@ -418,10 +424,10 @@
 	nvec_unregister_notifier(power->nvec, &power->notifier);
 	switch (pdev->id) {
 	case AC:
-		power_supply_unregister(&nvec_psy);
+		power_supply_unregister(nvec_psy);
 		break;
 	case BAT:
-		power_supply_unregister(&nvec_bat_psy);
+		power_supply_unregister(nvec_bat_psy);
 	}
 
 	return 0;
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 2accb6e..77d6425 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1181,7 +1181,7 @@
 	 * traditional iSCSI block I/O.
 	 */
 	if (iscsit_allocate_iovecs(cmd) < 0) {
-		return iscsit_add_reject_cmd(cmd,
+		return iscsit_reject_cmd(cmd,
 				ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
 	}
 	immed_data = cmd->immediate_data;
@@ -3468,6 +3468,7 @@
 						tpg_np_list) {
 				struct iscsi_np *np = tpg_np->tpg_np;
 				bool inaddr_any = iscsit_check_inaddr_any(np);
+				char *fmt_str;
 
 				if (np->np_network_transport != network_transport)
 					continue;
@@ -3495,8 +3496,12 @@
 					}
 				}
 
-				len = sprintf(buf, "TargetAddress="
-					"%s:%hu,%hu",
+				if (np->np_sockaddr.ss_family == AF_INET6)
+					fmt_str = "TargetAddress=[%s]:%hu,%hu";
+				else
+					fmt_str = "TargetAddress=%s:%hu,%hu";
+
+				len = sprintf(buf, fmt_str,
 					inaddr_any ? conn->local_ip : np->np_ip,
 					np->np_port,
 					tpg->tpgt);
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 79b4ec3..7faa6ae 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -781,8 +781,8 @@
 	}
 	if (flag &&
 	    dev->transport->get_write_cache) {
-		pr_err("emulate_fua_write not supported for this device\n");
-		return -EINVAL;
+		pr_warn("emulate_fua_write not supported for this device, ignoring\n");
+		return 0;
 	}
 	if (dev->export_count) {
 		pr_err("emulate_fua_write cannot be changed with active"
diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c
index d1ec580..76c515d 100644
--- a/drivers/thermal/st/st_thermal.c
+++ b/drivers/thermal/st/st_thermal.c
@@ -25,7 +25,7 @@
  * Function to allocate regfields which are common
  * between syscfg and memory mapped based sensors
  */
-int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
+static int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
 {
 	struct device *dev = sensor->dev;
 	struct regmap *regmap = sensor->regmap;
diff --git a/drivers/thermal/st/st_thermal_memmap.c b/drivers/thermal/st/st_thermal_memmap.c
index 067bfcd..fc0c9e1 100644
--- a/drivers/thermal/st/st_thermal_memmap.c
+++ b/drivers/thermal/st/st_thermal_memmap.c
@@ -157,7 +157,7 @@
 };
 
 /* Compatible device data stih416 mpe thermal sensor */
-const struct st_thermal_compat_data st_416mpe_cdata = {
+static const struct st_thermal_compat_data st_416mpe_cdata = {
 	.reg_fields		= st_mmap_thermal_regfields,
 	.ops			= &st_mmap_sensor_ops,
 	.calibration_val	= 14,
@@ -166,7 +166,7 @@
 };
 
 /* Compatible device data stih407 thermal sensor */
-const struct st_thermal_compat_data st_407_cdata = {
+static const struct st_thermal_compat_data st_407_cdata = {
 	.reg_fields		= st_mmap_thermal_regfields,
 	.ops			= &st_mmap_sensor_ops,
 	.calibration_val	= 16,
@@ -174,19 +174,19 @@
 	.crit_temp		= 120,
 };
 
-static struct of_device_id st_mmap_thermal_of_match[] = {
+static const struct of_device_id st_mmap_thermal_of_match[] = {
 	{ .compatible = "st,stih416-mpe-thermal", .data = &st_416mpe_cdata },
 	{ .compatible = "st,stih407-thermal",     .data = &st_407_cdata },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, st_mmap_thermal_of_match);
 
-int st_mmap_probe(struct platform_device *pdev)
+static int st_mmap_probe(struct platform_device *pdev)
 {
 	return st_thermal_register(pdev,  st_mmap_thermal_of_match);
 }
 
-int st_mmap_remove(struct platform_device *pdev)
+static int st_mmap_remove(struct platform_device *pdev)
 {
 	return st_thermal_unregister(pdev);
 }
diff --git a/drivers/thermal/st/st_thermal_syscfg.c b/drivers/thermal/st/st_thermal_syscfg.c
index 26d36a2..3df5b789 100644
--- a/drivers/thermal/st/st_thermal_syscfg.c
+++ b/drivers/thermal/st/st_thermal_syscfg.c
@@ -104,7 +104,7 @@
 };
 
 /* Compatible device data for stih415 sas thermal sensor */
-const struct st_thermal_compat_data st_415sas_cdata = {
+static const struct st_thermal_compat_data st_415sas_cdata = {
 	.sys_compat		= "st,stih415-front-syscfg",
 	.reg_fields		= st_415sas_regfields,
 	.ops			= &st_syscfg_sensor_ops,
@@ -114,7 +114,7 @@
 };
 
 /* Compatible device data for stih415 mpe thermal sensor */
-const struct st_thermal_compat_data st_415mpe_cdata = {
+static const struct st_thermal_compat_data st_415mpe_cdata = {
 	.sys_compat		= "st,stih415-system-syscfg",
 	.reg_fields		= st_415mpe_regfields,
 	.ops			= &st_syscfg_sensor_ops,
@@ -124,7 +124,7 @@
 };
 
 /* Compatible device data for stih416 sas thermal sensor */
-const struct st_thermal_compat_data st_416sas_cdata = {
+static const struct st_thermal_compat_data st_416sas_cdata = {
 	.sys_compat		= "st,stih416-front-syscfg",
 	.reg_fields		= st_416sas_regfields,
 	.ops			= &st_syscfg_sensor_ops,
@@ -134,7 +134,7 @@
 };
 
 /* Compatible device data for stid127 thermal sensor */
-const struct st_thermal_compat_data st_127_cdata = {
+static const struct st_thermal_compat_data st_127_cdata = {
 	.sys_compat		= "st,stid127-cpu-syscfg",
 	.reg_fields		= st_127_regfields,
 	.ops			= &st_syscfg_sensor_ops,
@@ -143,7 +143,7 @@
 	.crit_temp		= 120,
 };
 
-static struct of_device_id st_syscfg_thermal_of_match[] = {
+static const struct of_device_id st_syscfg_thermal_of_match[] = {
 	{ .compatible = "st,stih415-sas-thermal", .data = &st_415sas_cdata },
 	{ .compatible = "st,stih415-mpe-thermal", .data = &st_415mpe_cdata },
 	{ .compatible = "st,stih416-sas-thermal", .data = &st_416sas_cdata },
@@ -152,12 +152,12 @@
 };
 MODULE_DEVICE_TABLE(of, st_syscfg_thermal_of_match);
 
-int st_syscfg_probe(struct platform_device *pdev)
+static int st_syscfg_probe(struct platform_device *pdev)
 {
 	return st_thermal_register(pdev, st_syscfg_thermal_of_match);
 }
 
-int st_syscfg_remove(struct platform_device *pdev)
+static int st_syscfg_remove(struct platform_device *pdev)
 {
 	return st_thermal_unregister(pdev);
 }
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 174d3bc..4108db7 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -458,8 +458,10 @@
 
 	ret = thermal_zone_get_temp(tz, &temp);
 	if (ret) {
-		dev_warn(&tz->device, "failed to read out thermal zone %d\n",
-			 tz->id);
+		if (ret != -EAGAIN)
+			dev_warn(&tz->device,
+				 "failed to read out thermal zone (%d)\n",
+				 ret);
 		return;
 	}
 
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 259a4d5..843f2cd 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -275,6 +275,7 @@
 static void sysrq_handle_showstate(int key)
 {
 	show_state();
+	show_workqueue_state();
 }
 static struct sysrq_key_op sysrq_showstate_op = {
 	.handler	= sysrq_handle_showstate,
diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c
index 59cccfa..7225d52 100644
--- a/drivers/usb/phy/phy-ab8500-usb.c
+++ b/drivers/usb/phy/phy-ab8500-usb.c
@@ -277,7 +277,7 @@
 			dev_err(ab->dev, "Failed to set the Vintcore to 1.3V, ret=%d\n",
 					ret);
 
-		ret = regulator_set_optimum_mode(ab->v_ulpi, 28000);
+		ret = regulator_set_load(ab->v_ulpi, 28000);
 		if (ret < 0)
 			dev_err(ab->dev, "Failed to set optimum mode (ret=%d)\n",
 					ret);
@@ -317,7 +317,7 @@
 						ab->saved_v_ulpi, ret);
 		}
 
-		ret = regulator_set_optimum_mode(ab->v_ulpi, 0);
+		ret = regulator_set_load(ab->v_ulpi, 0);
 		if (ret < 0)
 			dev_err(ab->dev, "Failed to set optimum mode (ret=%d)\n",
 					ret);
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index b50c45c..c9156be 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -142,27 +142,22 @@
 	int ret = 0;
 
 	if (on) {
-		ret = regulator_set_optimum_mode(motg->v1p8,
-				USB_PHY_1P8_HPM_LOAD);
+		ret = regulator_set_load(motg->v1p8, USB_PHY_1P8_HPM_LOAD);
 		if (ret < 0) {
 			pr_err("Could not set HPM for v1p8\n");
 			return ret;
 		}
-		ret = regulator_set_optimum_mode(motg->v3p3,
-				USB_PHY_3P3_HPM_LOAD);
+		ret = regulator_set_load(motg->v3p3, USB_PHY_3P3_HPM_LOAD);
 		if (ret < 0) {
 			pr_err("Could not set HPM for v3p3\n");
-			regulator_set_optimum_mode(motg->v1p8,
-				USB_PHY_1P8_LPM_LOAD);
+			regulator_set_load(motg->v1p8, USB_PHY_1P8_LPM_LOAD);
 			return ret;
 		}
 	} else {
-		ret = regulator_set_optimum_mode(motg->v1p8,
-				USB_PHY_1P8_LPM_LOAD);
+		ret = regulator_set_load(motg->v1p8, USB_PHY_1P8_LPM_LOAD);
 		if (ret < 0)
 			pr_err("Could not set LPM for v1p8\n");
-		ret = regulator_set_optimum_mode(motg->v3p3,
-				USB_PHY_3P3_LPM_LOAD);
+		ret = regulator_set_load(motg->v3p3, USB_PHY_3P3_LPM_LOAD);
 		if (ret < 0)
 			pr_err("Could not set LPM for v3p3\n");
 	}
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1..a793f70 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -278,11 +278,11 @@
 	return 0;
 }
 
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct kioctx_table *table;
-	int i;
+	int i, res = -EINVAL;
 
 	spin_lock(&mm->ioctx_lock);
 	rcu_read_lock();
@@ -292,13 +292,17 @@
 
 		ctx = table->table[i];
 		if (ctx && ctx->aio_ring_file == file) {
-			ctx->user_id = ctx->mmap_base = vma->vm_start;
+			if (!atomic_read(&ctx->dead)) {
+				ctx->user_id = ctx->mmap_base = vma->vm_start;
+				res = 0;
+			}
 			break;
 		}
 	}
 
 	rcu_read_unlock();
 	spin_unlock(&mm->ioctx_lock);
+	return res;
 }
 
 static const struct file_operations aio_ring_fops = {
@@ -727,6 +731,9 @@
 err_cleanup:
 	aio_nr_sub(ctx->max_reqs);
 err_ctx:
+	atomic_set(&ctx->dead, 1);
+	if (ctx->mmap_size)
+		vm_munmap(ctx->mmap_base, ctx->mmap_size);
 	aio_free_ring(ctx);
 err:
 	mutex_unlock(&ctx->ring_lock);
@@ -748,11 +755,12 @@
 {
 	struct kioctx_table *table;
 
-	if (atomic_xchg(&ctx->dead, 1))
-		return -EINVAL;
-
-
 	spin_lock(&mm->ioctx_lock);
+	if (atomic_xchg(&ctx->dead, 1)) {
+		spin_unlock(&mm->ioctx_lock);
+		return -EINVAL;
+	}
+
 	table = rcu_dereference_raw(mm->ioctx_table);
 	WARN_ON(ctx != table->table[ctx->id]);
 	table->table[ctx->id] = NULL;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46e0d4e..ba1790e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2394,7 +2394,6 @@
 		/*
 		 * for completing the rest of the request.
 		 */
-		*ppos += written;
 		count -= written;
 		written_buffered = generic_perform_write(file, from, *ppos);
 		/*
@@ -2409,7 +2408,6 @@
 			goto out_dio;
 		}
 
-		iocb->ki_pos = *ppos + written_buffered;
 		/* We need to ensure that the page cache pages are written to
 		 * disk and invalidated to preserve the expected O_DIRECT
 		 * semantics.
@@ -2418,6 +2416,7 @@
 		ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
 				endbyte);
 		if (ret == 0) {
+			iocb->ki_pos = *ppos + written_buffered;
 			written += written_buffered;
 			invalidate_mapping_pages(mapping,
 					*ppos >> PAGE_CACHE_SHIFT,
@@ -2440,10 +2439,14 @@
 	/* buffered aio wouldn't have proper lock coverage today */
 	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
+	if (unlikely(written <= 0))
+		goto no_sync;
+
 	if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
 	    ((file->f_flags & O_DIRECT) && !direct_io)) {
-		ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
-					       *ppos + count - 1);
+		ret = filemap_fdatawrite_range(file->f_mapping,
+					       iocb->ki_pos - written,
+					       iocb->ki_pos - 1);
 		if (ret < 0)
 			written = ret;
 
@@ -2454,10 +2457,12 @@
 		}
 
 		if (!ret)
-			ret = filemap_fdatawait_range(file->f_mapping, *ppos,
-						      *ppos + count - 1);
+			ret = filemap_fdatawait_range(file->f_mapping,
+						      iocb->ki_pos - written,
+						      iocb->ki_pos - 1);
 	}
 
+no_sync:
 	/*
 	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
 	 * function pointer which is called when o_direct io completes so that
diff --git a/include/dt-bindings/interrupt-controller/irq-st.h b/include/dt-bindings/interrupt-controller/irq-st.h
new file mode 100644
index 0000000..4c59ace
--- /dev/null
+++ b/include/dt-bindings/interrupt-controller/irq-st.h
@@ -0,0 +1,30 @@
+/*
+ *  include/linux/irqchip/irq-st.h
+ *
+ *  Copyright (C) 2014 STMicroelectronics – All Rights Reserved
+ *
+ *  Author: Lee Jones <lee.jones@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _DT_BINDINGS_INTERRUPT_CONTROLLER_ST_H
+#define _DT_BINDINGS_INTERRUPT_CONTROLLER_ST_H
+
+#define ST_IRQ_SYSCFG_EXT_0		0
+#define ST_IRQ_SYSCFG_EXT_1		1
+#define ST_IRQ_SYSCFG_EXT_2		2
+#define ST_IRQ_SYSCFG_CTI_0		3
+#define ST_IRQ_SYSCFG_CTI_1		4
+#define ST_IRQ_SYSCFG_PMU_0		5
+#define ST_IRQ_SYSCFG_PMU_1		6
+#define ST_IRQ_SYSCFG_pl310_L2		7
+#define ST_IRQ_SYSCFG_DISABLED		0xFFFFFFFF
+
+#define ST_IRQ_SYSCFG_EXT_1_INV		0x1
+#define ST_IRQ_SYSCFG_EXT_2_INV		0x2
+#define ST_IRQ_SYSCFG_EXT_3_INV		0x4
+
+#endif
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index b3f45a5..e596675 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -24,17 +24,14 @@
 #include <linux/workqueue.h>
 
 struct arch_timer_kvm {
-#ifdef CONFIG_KVM_ARM_TIMER
 	/* Is the timer enabled */
 	bool			enabled;
 
 	/* Virtual offset */
 	cycle_t			cntvoff;
-#endif
 };
 
 struct arch_timer_cpu {
-#ifdef CONFIG_KVM_ARM_TIMER
 	/* Registers: control register, timer value */
 	u32				cntv_ctl;	/* Saved/restored */
 	cycle_t				cntv_cval;	/* Saved/restored */
@@ -55,10 +52,8 @@
 
 	/* Timer IRQ */
 	const struct kvm_irq_level	*irq;
-#endif
 };
 
-#ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
 void kvm_timer_enable(struct kvm *kvm);
 void kvm_timer_init(struct kvm *kvm);
@@ -72,30 +67,6 @@
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
-#else
-static inline int kvm_timer_hyp_init(void)
-{
-	return 0;
-};
-
-static inline void kvm_timer_enable(struct kvm *kvm) {}
-static inline void kvm_timer_init(struct kvm *kvm) {}
-static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
-					const struct kvm_irq_level *irq) {}
-static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
-	return 0;
-}
-
-static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
-	return 0;
-}
-#endif
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 66203b2..133ea00 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,6 +24,7 @@
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <kvm/iodev.h>
 
 #define VGIC_NR_IRQS_LEGACY	256
 #define VGIC_NR_SGIS		16
@@ -140,16 +141,21 @@
 };
 
 struct vgic_vm_ops {
-	bool	(*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
-			       struct kvm_exit_mmio *);
 	bool	(*queue_sgi)(struct kvm_vcpu *, int irq);
 	void	(*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
 	int	(*init_model)(struct kvm *);
 	int	(*map_resources)(struct kvm *, const struct vgic_params *);
 };
 
+struct vgic_io_device {
+	gpa_t addr;
+	int len;
+	const struct vgic_io_range *reg_ranges;
+	struct kvm_vcpu *redist_vcpu;
+	struct kvm_io_device dev;
+};
+
 struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
 	bool			in_kernel;
 	bool			ready;
@@ -197,6 +203,9 @@
 	/* Level-triggered interrupt queued on VCPU interface */
 	struct vgic_bitmap	irq_queued;
 
+	/* Interrupt was active when unqueue from VCPU interface */
+	struct vgic_bitmap	irq_active;
+
 	/* Interrupt priority. Not used yet. */
 	struct vgic_bytemap	irq_priority;
 
@@ -237,8 +246,12 @@
 	/* Bitmap indicating which CPU has something pending */
 	unsigned long		*irq_pending_on_cpu;
 
+	/* Bitmap indicating which CPU has active IRQs */
+	unsigned long		*irq_active_on_cpu;
+
 	struct vgic_vm_ops	vm_ops;
-#endif
+	struct vgic_io_device	dist_iodev;
+	struct vgic_io_device	*redist_iodevs;
 };
 
 struct vgic_v2_cpu_if {
@@ -266,13 +279,18 @@
 };
 
 struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
 	u8		*vgic_irq_lr_map;
 
-	/* Pending interrupts on this VCPU */
+	/* Pending/active/both interrupts on this VCPU */
 	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
+	DECLARE_BITMAP(	active_percpu, VGIC_NR_PRIVATE_IRQS);
+	DECLARE_BITMAP(	pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
+
+	/* Pending/active/both shared interrupts, dynamically sized */
 	unsigned long	*pending_shared;
+	unsigned long   *active_shared;
+	unsigned long   *pend_act_shared;
 
 	/* Bitmap of used/free list registers */
 	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
@@ -285,7 +303,6 @@
 		struct vgic_v2_cpu_if	vgic_v2;
 		struct vgic_v3_cpu_if	vgic_v3;
 	};
-#endif
 };
 
 #define LR_EMPTY	0xff
@@ -295,10 +312,7 @@
 
 struct kvm;
 struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
 
-#ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_map_resources(struct kvm *kvm);
@@ -312,8 +326,7 @@
 			bool level);
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio);
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
@@ -335,84 +348,4 @@
 }
 #endif
 
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
-{
-	return -ENXIO;
-}
-
-static inline int kvm_vgic_map_resources(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
-{
-	return 0;
-}
-
-static inline void kvm_vgic_destroy(struct kvm *kvm)
-{
-}
-
-static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
-				      unsigned int irq_num, bool level)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
-	return true;
-}
-
-static inline bool vgic_ready(struct kvm *kvm)
-{
-	return true;
-}
-
-static inline int kvm_vgic_get_max_vcpus(void)
-{
-	return KVM_MAX_VCPUS;
-}
-#endif
-
 #endif
diff --git a/virt/kvm/iodev.h b/include/kvm/iodev.h
similarity index 66%
rename from virt/kvm/iodev.h
rename to include/kvm/iodev.h
index 12fd3ca..a6d208b 100644
--- a/virt/kvm/iodev.h
+++ b/include/kvm/iodev.h
@@ -9,17 +9,17 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __KVM_IODEV_H__
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
-#include <asm/errno.h>
+#include <linux/errno.h>
 
 struct kvm_io_device;
+struct kvm_vcpu;
 
 /**
  * kvm_io_device_ops are called under kvm slots_lock.
@@ -27,11 +27,13 @@
  * or non-zero to have it passed to the next device.
  **/
 struct kvm_io_device_ops {
-	int (*read)(struct kvm_io_device *this,
+	int (*read)(struct kvm_vcpu *vcpu,
+		    struct kvm_io_device *this,
 		    gpa_t addr,
 		    int len,
 		    void *val);
-	int (*write)(struct kvm_io_device *this,
+	int (*write)(struct kvm_vcpu *vcpu,
+		     struct kvm_io_device *this,
 		     gpa_t addr,
 		     int len,
 		     const void *val);
@@ -49,16 +51,20 @@
 	dev->ops = ops;
 }
 
-static inline int kvm_iodevice_read(struct kvm_io_device *dev,
-				    gpa_t addr, int l, void *v)
+static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu,
+				    struct kvm_io_device *dev, gpa_t addr,
+				    int l, void *v)
 {
-	return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
+	return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v)
+				: -EOPNOTSUPP;
 }
 
-static inline int kvm_iodevice_write(struct kvm_io_device *dev,
-				     gpa_t addr, int l, const void *v)
+static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu,
+				     struct kvm_io_device *dev, gpa_t addr,
+				     int l, const void *v)
 {
-	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
+	return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v)
+				 : -EOPNOTSUPP;
 }
 
 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 1648026..b666b77 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -94,6 +94,8 @@
 	ATA_ID_SECTOR_SIZE	= 106,
 	ATA_ID_WWN		= 108,
 	ATA_ID_LOGICAL_SECTOR_SIZE	= 117,	/* and 118 */
+	ATA_ID_COMMAND_SET_3	= 119,
+	ATA_ID_COMMAND_SET_4	= 120,
 	ATA_ID_LAST_LUN		= 126,
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
@@ -177,7 +179,7 @@
 	ATA_DSC			= (1 << 4),	/* drive seek complete */
 	ATA_DRQ			= (1 << 3),	/* data request i/o */
 	ATA_CORR		= (1 << 2),	/* corrected data error */
-	ATA_IDX			= (1 << 1),	/* index */
+	ATA_SENSE		= (1 << 1),	/* sense code available */
 	ATA_ERR			= (1 << 0),	/* have an error */
 	ATA_SRST		= (1 << 2),	/* software reset */
 	ATA_ICRC		= (1 << 7),	/* interface CRC error */
@@ -382,6 +384,8 @@
 	SATA_SSP		= 0x06,	/* Software Settings Preservation */
 	SATA_DEVSLP		= 0x09,	/* Device Sleep */
 
+	SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */
+
 	/* feature values for SET_MAX */
 	ATA_SET_MAX_ADDR	= 0x00,
 	ATA_SET_MAX_PASSWD	= 0x01,
@@ -525,6 +529,8 @@
 #define ata_id_cdb_intr(id)	(((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
 #define ata_id_has_da(id)	((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
 #define ata_id_has_devslp(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
+#define ata_id_has_ncq_autosense(id) \
+				((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))
 
 static inline bool ata_id_has_hipm(const u16 *id)
 {
@@ -696,6 +702,27 @@
 	return id[ATA_ID_CFS_ENABLE_1] & (1 << 5);
 }
 
+static inline bool ata_id_has_read_log_dma_ext(const u16 *id)
+{
+	if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
+		return false;
+	return id[ATA_ID_COMMAND_SET_3] & (1 << 3);
+}
+
+static inline bool ata_id_has_sense_reporting(const u16 *id)
+{
+	if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
+		return false;
+	return id[ATA_ID_COMMAND_SET_3] & (1 << 6);
+}
+
+static inline bool ata_id_sense_reporting_enabled(const u16 *id)
+{
+	if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
+		return false;
+	return id[ATA_ID_COMMAND_SET_4] & (1 << 6);
+}
+
 /**
  *	ata_id_major_version	-	get ATA level of drive
  *	@id: Identify data
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index c294e3e..a1b25e3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -181,7 +181,9 @@
 	__REQ_ELVPRIV,		/* elevator private data attached */
 	__REQ_FAILED,		/* set if the request failed */
 	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
+	__REQ_PREEMPT,		/* set for "ide_preempt" requests and also
+				   for requests for which the SCSI "quiesce"
+				   state must be ignored. */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_COPY_USER,	/* contains copies of user pages */
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 2e4cb67..96c280b 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -8,33 +8,19 @@
 #ifndef _LINUX_CLOCKCHIPS_H
 #define _LINUX_CLOCKCHIPS_H
 
-/* Clock event notification values */
-enum clock_event_nofitiers {
-	CLOCK_EVT_NOTIFY_ADD,
-	CLOCK_EVT_NOTIFY_BROADCAST_ON,
-	CLOCK_EVT_NOTIFY_BROADCAST_OFF,
-	CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-	CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
-	CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
-	CLOCK_EVT_NOTIFY_SUSPEND,
-	CLOCK_EVT_NOTIFY_RESUME,
-	CLOCK_EVT_NOTIFY_CPU_DYING,
-	CLOCK_EVT_NOTIFY_CPU_DEAD,
-};
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
-
-#include <linux/clocksource.h>
-#include <linux/cpumask.h>
-#include <linux/ktime.h>
-#include <linux/notifier.h>
+# include <linux/clocksource.h>
+# include <linux/cpumask.h>
+# include <linux/ktime.h>
+# include <linux/notifier.h>
 
 struct clock_event_device;
 struct module;
 
-/* Clock event mode commands */
+/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
 enum clock_event_mode {
-	CLOCK_EVT_MODE_UNUSED = 0,
+	CLOCK_EVT_MODE_UNUSED,
 	CLOCK_EVT_MODE_SHUTDOWN,
 	CLOCK_EVT_MODE_PERIODIC,
 	CLOCK_EVT_MODE_ONESHOT,
@@ -42,30 +28,49 @@
 };
 
 /*
+ * Possible states of a clock event device.
+ *
+ * DETACHED:	Device is not used by clockevents core. Initial state or can be
+ *		reached from SHUTDOWN.
+ * SHUTDOWN:	Device is powered-off. Can be reached from PERIODIC or ONESHOT.
+ * PERIODIC:	Device is programmed to generate events periodically. Can be
+ *		reached from DETACHED or SHUTDOWN.
+ * ONESHOT:	Device is programmed to generate event only once. Can be reached
+ *		from DETACHED or SHUTDOWN.
+ */
+enum clock_event_state {
+	CLOCK_EVT_STATE_DETACHED,
+	CLOCK_EVT_STATE_SHUTDOWN,
+	CLOCK_EVT_STATE_PERIODIC,
+	CLOCK_EVT_STATE_ONESHOT,
+};
+
+/*
  * Clock event features
  */
-#define CLOCK_EVT_FEAT_PERIODIC		0x000001
-#define CLOCK_EVT_FEAT_ONESHOT		0x000002
-#define CLOCK_EVT_FEAT_KTIME		0x000004
+# define CLOCK_EVT_FEAT_PERIODIC	0x000001
+# define CLOCK_EVT_FEAT_ONESHOT		0x000002
+# define CLOCK_EVT_FEAT_KTIME		0x000004
+
 /*
- * x86(64) specific misfeatures:
+ * x86(64) specific (mis)features:
  *
  * - Clockevent source stops in C3 State and needs broadcast support.
  * - Local APIC timer is used as a dummy device.
  */
-#define CLOCK_EVT_FEAT_C3STOP		0x000008
-#define CLOCK_EVT_FEAT_DUMMY		0x000010
+# define CLOCK_EVT_FEAT_C3STOP		0x000008
+# define CLOCK_EVT_FEAT_DUMMY		0x000010
 
 /*
  * Core shall set the interrupt affinity dynamically in broadcast mode
  */
-#define CLOCK_EVT_FEAT_DYNIRQ		0x000020
-#define CLOCK_EVT_FEAT_PERCPU		0x000040
+# define CLOCK_EVT_FEAT_DYNIRQ		0x000020
+# define CLOCK_EVT_FEAT_PERCPU		0x000040
 
 /*
  * Clockevent device is based on a hrtimer for broadcast
  */
-#define CLOCK_EVT_FEAT_HRTIMER		0x000080
+# define CLOCK_EVT_FEAT_HRTIMER		0x000080
 
 /**
  * struct clock_event_device - clock event device descriptor
@@ -78,10 +83,15 @@
  * @min_delta_ns:	minimum delta value in ns
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
- * @mode:		operating mode assigned by the management code
+ * @mode:		operating mode, relevant only to ->set_mode(), OBSOLETE
+ * @state:		current state of the device, assigned by the core code
  * @features:		features
  * @retries:		number of forced programming retries
- * @set_mode:		set mode function
+ * @set_mode:		legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_state_periodic:	switch state to periodic, if !set_mode
+ * @set_state_oneshot:	switch state to oneshot, if !set_mode
+ * @set_state_shutdown:	switch state to shutdown, if !set_mode
+ * @tick_resume:	resume clkevt device, if !set_mode
  * @broadcast:		function to broadcast events
  * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
  * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
@@ -95,22 +105,31 @@
  */
 struct clock_event_device {
 	void			(*event_handler)(struct clock_event_device *);
-	int			(*set_next_event)(unsigned long evt,
-						  struct clock_event_device *);
-	int			(*set_next_ktime)(ktime_t expires,
-						  struct clock_event_device *);
+	int			(*set_next_event)(unsigned long evt, struct clock_event_device *);
+	int			(*set_next_ktime)(ktime_t expires, struct clock_event_device *);
 	ktime_t			next_event;
 	u64			max_delta_ns;
 	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
 	enum clock_event_mode	mode;
+	enum clock_event_state	state;
 	unsigned int		features;
 	unsigned long		retries;
 
+	/*
+	 * State transition callback(s): Only one of the two groups should be
+	 * defined:
+	 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+	 * - set_state_{shutdown|periodic|oneshot}(), tick_resume().
+	 */
+	void			(*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
+	int			(*set_state_periodic)(struct clock_event_device *);
+	int			(*set_state_oneshot)(struct clock_event_device *);
+	int			(*set_state_shutdown)(struct clock_event_device *);
+	int			(*tick_resume)(struct clock_event_device *);
+
 	void			(*broadcast)(const struct cpumask *mask);
-	void			(*set_mode)(enum clock_event_mode mode,
-					    struct clock_event_device *);
 	void			(*suspend)(struct clock_event_device *);
 	void			(*resume)(struct clock_event_device *);
 	unsigned long		min_delta_ticks;
@@ -136,18 +155,18 @@
  *
  * factor = (clock_ticks << shift) / nanoseconds
  */
-static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
-				   int shift)
+static inline unsigned long
+div_sc(unsigned long ticks, unsigned long nsec, int shift)
 {
-	uint64_t tmp = ((uint64_t)ticks) << shift;
+	u64 tmp = ((u64)ticks) << shift;
 
 	do_div(tmp, nsec);
+
 	return (unsigned long) tmp;
 }
 
 /* Clock event layer functions */
-extern u64 clockevent_delta2ns(unsigned long latch,
-			       struct clock_event_device *evt);
+extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu);
 
@@ -158,57 +177,42 @@
 
 extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
 
-extern void clockevents_exchange_device(struct clock_event_device *old,
-					struct clock_event_device *new);
-extern void clockevents_set_mode(struct clock_event_device *dev,
-				 enum clock_event_mode mode);
-extern int clockevents_program_event(struct clock_event_device *dev,
-				     ktime_t expires, bool force);
-
-extern void clockevents_handle_noop(struct clock_event_device *dev);
-
 static inline void
 clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
 {
-	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC,
-				      freq, minsec);
+	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec);
 }
 
 extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+#  ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
 extern void tick_broadcast(const struct cpumask *mask);
-#else
-#define tick_broadcast	NULL
-#endif
+#  else
+#   define tick_broadcast	NULL
+#  endif
 extern int tick_receive_broadcast(void);
-#endif
+# endif
 
-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_setup_hrtimer_broadcast(void);
 extern int tick_check_broadcast_expired(void);
-#else
+# else
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
-#endif
+static inline void tick_setup_hrtimer_broadcast(void) { }
+# endif
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern int clockevents_notify(unsigned long reason, void *arg);
-#else
-static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
-#endif
 
-#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */
+#else /* !CONFIG_GENERIC_CLOCKEVENTS: */
 
-static inline void clockevents_suspend(void) {}
-static inline void clockevents_resume(void) {}
-
+static inline void clockevents_suspend(void) { }
+static inline void clockevents_resume(void) { }
 static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
+static inline void tick_setup_hrtimer_broadcast(void) { }
 
-#endif
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-#endif
+#endif /* _LINUX_CLOCKCHIPS_H */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 9c78d15..1355098 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -56,6 +56,7 @@
  * @shift:		cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
  * @maxadj:		maximum adjustment value to mult (~11%)
+ * @max_cycles:		maximum safe cycle value which won't overflow on multiplication
  * @flags:		flags describing special properties
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
-
+	u64 max_cycles;
 	const char *name;
 	struct list_head list;
 	int rating;
@@ -178,7 +179,6 @@
 }
 
 
-extern int clocksource_register(struct clocksource*);
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 
@@ -200,7 +200,16 @@
 extern int
 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
 extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+	return __clocksource_register_scale(cs, 1, 0);
+}
 
 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
 {
@@ -212,14 +221,14 @@
 	return __clocksource_register_scale(cs, 1000, khz);
 }
 
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
 {
-	__clocksource_updatefreq_scale(cs, 1, hz);
+	__clocksource_update_freq_scale(cs, 1, hz);
 }
 
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
 {
-	__clocksource_updatefreq_scale(cs, 1000, khz);
+	__clocksource_update_freq_scale(cs, 1000, khz);
 }
 
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 1b45e4a..0e41ca0 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -192,29 +192,16 @@
 
 #include <uapi/linux/types.h>
 
-static __always_inline void data_access_exceeds_word_size(void)
-#ifdef __compiletime_warning
-__compiletime_warning("data access exceeds word size and won't be atomic")
-#endif
-;
-
-static __always_inline void data_access_exceeds_word_size(void)
-{
-}
-
 static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
 {
 	switch (size) {
 	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
 	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
 	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
-#ifdef CONFIG_64BIT
 	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
-#endif
 	default:
 		barrier();
 		__builtin_memcpy((void *)res, (const void *)p, size);
-		data_access_exceeds_word_size();
 		barrier();
 	}
 }
@@ -225,13 +212,10 @@
 	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
 	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
 	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
-#ifdef CONFIG_64BIT
 	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
-#endif
 	default:
 		barrier();
 		__builtin_memcpy((void *)p, (const void *)res, size);
-		data_access_exceeds_word_size();
 		barrier();
 	}
 }
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 306178d..9c5e892 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -77,7 +77,6 @@
 	unsigned int		cpu;
 
 	int			last_residency;
-	int			state_count;
 	struct cpuidle_state_usage	states_usage[CPUIDLE_STATE_MAX];
 	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
 	struct cpuidle_driver_kobj *kobj_driver;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index 022e34f..52456aa 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -14,6 +14,8 @@
 #include <asm/io.h>
 #include <asm/scatterlist.h>
 
+struct device;
+
 struct dma_pool *dma_pool_create(const char *name, struct device *dev, 
 			size_t size, size_t align, size_t allocation);
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index cf7e431..af5be03 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -942,6 +942,7 @@
 #define EFI_64BIT		5	/* Is the firmware 64-bit? */
 #define EFI_PARAVIRT		6	/* Access is via a paravirt interface */
 #define EFI_ARCH_1		7	/* First arch-specific bit */
+#define EFI_DBG			8	/* Print additional debug info at runtime */
 
 #ifdef CONFIG_EFI
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f4131e8..52cc449 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1549,7 +1549,7 @@
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
-	void (*mremap)(struct file *, struct vm_area_struct *);
+	int (*mremap)(struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
 	int (*release) (struct inode *, struct file *);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index cba442e..f4af034 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -9,7 +9,7 @@
 
 
 extern void synchronize_irq(unsigned int irq);
-extern void synchronize_hardirq(unsigned int irq);
+extern bool synchronize_hardirq(unsigned int irq);
 
 #if defined(CONFIG_TINY_RCU)
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index efc7787..f94cf28 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -514,10 +514,10 @@
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 	/*
 	 * Power supply information for HID devices which report
-	 * battery strength. power_supply is registered iff
-	 * battery.name is non-NULL.
+	 * battery strength. power_supply was successfully registered if
+	 * battery is non-NULL.
 	 */
-	struct power_supply battery;
+	struct power_supply *battery;
 	__s32 battery_min;
 	__s32 battery_max;
 	__s32 battery_report_type;
diff --git a/include/linux/init.h b/include/linux/init.h
index 2df8e8d..21b6d76 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -253,21 +253,41 @@
  * obs_kernel_param "array" too far apart in .init.setup.
  */
 #define __setup_param(str, unique_id, fn, early)			\
-	static const char __setup_str_##unique_id[] __initconst	\
-		__aligned(1) = str; \
-	static struct obs_kernel_param __setup_##unique_id	\
-		__used __section(.init.setup)			\
-		__attribute__((aligned((sizeof(long)))))	\
+	static const char __setup_str_##unique_id[] __initconst		\
+		__aligned(1) = str; 					\
+	static struct obs_kernel_param __setup_##unique_id		\
+		__used __section(.init.setup)				\
+		__attribute__((aligned((sizeof(long)))))		\
 		= { __setup_str_##unique_id, fn, early }
 
-#define __setup(str, fn)					\
+#define __setup(str, fn)						\
 	__setup_param(str, fn, fn, 0)
 
-/* NOTE: fn is as per module_param, not __setup!  Emits warning if fn
- * returns non-zero. */
-#define early_param(str, fn)					\
+/*
+ * NOTE: fn is as per module_param, not __setup!
+ * Emits warning if fn returns non-zero.
+ */
+#define early_param(str, fn)						\
 	__setup_param(str, fn, fn, 1)
 
+#define early_param_on_off(str_on, str_off, var, config)		\
+									\
+	int var = IS_ENABLED(config);					\
+									\
+	static int __init parse_##var##_on(char *arg)			\
+	{								\
+		var = 1;						\
+		return 0;						\
+	}								\
+	__setup_param(str_on, parse_##var##_on, parse_##var##_on, 1);	\
+									\
+	static int __init parse_##var##_off(char *arg)			\
+	{								\
+		var = 0;						\
+		return 0;						\
+	}								\
+	__setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
+
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
 void __init parse_early_options(char *cmdline);
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
deleted file mode 100644
index 10496bd..0000000
--- a/include/linux/intel_mid_dma.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  intel_mid_dma.h - Intel MID DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#ifndef __INTEL_MID_DMA_H__
-#define __INTEL_MID_DMA_H__
-
-#include <linux/dmaengine.h>
-
-#define DMA_PREP_CIRCULAR_LIST		(1 << 10)
-
-/*DMA mode configurations*/
-enum intel_mid_dma_mode {
-	LNW_DMA_PER_TO_MEM = 0, /*periphral to memory configuration*/
-	LNW_DMA_MEM_TO_PER,	/*memory to periphral configuration*/
-	LNW_DMA_MEM_TO_MEM,	/*mem to mem confg (testing only)*/
-};
-
-/*DMA handshaking*/
-enum intel_mid_dma_hs_mode {
-	LNW_DMA_HW_HS = 0,	/*HW Handshaking only*/
-	LNW_DMA_SW_HS = 1,	/*SW Handshaking not recommended*/
-};
-
-/*Burst size configuration*/
-enum intel_mid_dma_msize {
-	LNW_DMA_MSIZE_1 = 0x0,
-	LNW_DMA_MSIZE_4 = 0x1,
-	LNW_DMA_MSIZE_8 = 0x2,
-	LNW_DMA_MSIZE_16 = 0x3,
-	LNW_DMA_MSIZE_32 = 0x4,
-	LNW_DMA_MSIZE_64 = 0x5,
-};
-
-/**
- * struct intel_mid_dma_slave - DMA slave structure
- *
- * @dirn: DMA trf direction
- * @src_width: tx register width
- * @dst_width: rx register width
- * @hs_mode: HW/SW handshaking mode
- * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem)
- * @src_msize: Source DMA burst size
- * @dst_msize: Dst DMA burst size
- * @per_addr: Periphral address
- * @device_instance: DMA peripheral device instance, we can have multiple
- *		peripheral device connected to single DMAC
- */
-struct intel_mid_dma_slave {
-	enum intel_mid_dma_hs_mode	hs_mode;  /*handshaking*/
-	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
-	unsigned int		device_instance; /*0, 1 for periphral instance*/
-	struct dma_slave_config		dma_slave;
-};
-
-#endif /*__INTEL_MID_DMA_H__*/
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 2e88580..950ae45 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -39,8 +39,6 @@
  * These flags used only by the kernel as part of the
  * irq handling routines.
  *
- * IRQF_DISABLED - keep irqs disabled when calling the action handler.
- *                 DEPRECATED. This flag is a NOOP and scheduled to be removed
  * IRQF_SHARED - allow sharing the irq among several devices
  * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur
  * IRQF_TIMER - Flag to mark this interrupt as timer interrupt
@@ -64,7 +62,6 @@
  *                wakeup devices users need to implement wakeup detection in
  *                their interrupt handlers.
  */
-#define IRQF_DISABLED		0x00000020
 #define IRQF_SHARED		0x00000080
 #define IRQF_PROBE_SHARED	0x00000100
 #define __IRQF_TIMER		0x00000200
@@ -191,6 +188,7 @@
 #endif
 
 extern void disable_irq_nosync(unsigned int irq);
+extern bool disable_hardirq(unsigned int irq);
 extern void disable_irq(unsigned int irq);
 extern void disable_percpu_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
@@ -363,6 +361,20 @@
 	return irq_set_irq_wake(irq, 0);
 }
 
+/*
+ * irq_get_irqchip_state/irq_set_irqchip_state specific flags
+ */
+enum irqchip_irq_state {
+	IRQCHIP_STATE_PENDING,		/* Is interrupt pending? */
+	IRQCHIP_STATE_ACTIVE,		/* Is interrupt in progress? */
+	IRQCHIP_STATE_MASKED,		/* Is interrupt masked? */
+	IRQCHIP_STATE_LINE_LEVEL,	/* Is IRQ line high? */
+};
+
+extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
+				 bool *state);
+extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
+				 bool state);
 
 #ifdef CONFIG_IRQ_FORCED_THREADING
 extern bool force_irqthreads;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d09ec7a..62c6901 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -30,6 +30,7 @@
 struct seq_file;
 struct module;
 struct msi_msg;
+enum irqchip_irq_state;
 
 /*
  * IRQ line status.
@@ -324,6 +325,8 @@
  *				irq_request_resources
  * @irq_compose_msi_msg:	optional to compose message content for MSI
  * @irq_write_msi_msg:	optional to write message content for MSI
+ * @irq_get_irqchip_state:	return the internal state of an interrupt
+ * @irq_set_irqchip_state:	set the internal state of a interrupt
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -363,6 +366,9 @@
 	void		(*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
 	void		(*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg);
 
+	int		(*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state);
+	int		(*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state);
+
 	unsigned long	flags;
 };
 
@@ -460,6 +466,7 @@
 extern int irq_chip_set_affinity_parent(struct irq_data *data,
 					const struct cpumask *dest,
 					bool force);
+extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on);
 #endif
 
 /* Handling of unhandled and spurious interrupts: */
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index bf3fe71..47b9ebd 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -38,16 +38,17 @@
 bool irq_work_queue_on(struct irq_work *work, int cpu);
 #endif
 
-void irq_work_run(void);
 void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
 #include <asm/irq_work.h>
 
+void irq_work_run(void);
 bool irq_work_needs_cpu(void);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
+static inline void irq_work_run(void) { }
 #endif
 
 #endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 71d706d..36ec4ae 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -97,6 +97,7 @@
 
 extern struct irq_chip gic_arch_extn;
 
+void gic_set_irqchip_flags(unsigned long flags);
 void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
 		    u32 offset, struct device_node *);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
@@ -115,11 +116,5 @@
 void gic_migrate_target(unsigned int new_cpu_id);
 unsigned long gic_get_sgir_physaddr(void);
 
-extern const struct irq_domain_ops *gic_routable_irq_domain_ops;
-static inline void __init register_routable_domain_ops
-					(const struct irq_domain_ops *ops)
-{
-	gic_routable_irq_domain_ops = ops;
-}
 #endif /* __ASSEMBLY */
 #endif
diff --git a/include/linux/irqchip/irq-crossbar.h b/include/linux/irqchip/irq-crossbar.h
deleted file mode 100644
index e5537b8..0000000
--- a/include/linux/irqchip/irq-crossbar.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- *  drivers/irqchip/irq-crossbar.h
- *
- *  Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-int irqcrossbar_init(void);
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index e6a6aac..3ea2e47 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -240,6 +240,8 @@
 extern cycle_t gic_read_compare(void);
 extern void gic_write_compare(cycle_t cnt);
 extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
+extern void gic_start_count(void);
+extern void gic_stop_count(void);
 extern void gic_send_ipi(unsigned int intr);
 extern unsigned int plat_ipi_call_int_xlate(unsigned int);
 extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index d176d65..5dd1272 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -85,7 +85,7 @@
  * The local_irq_*() APIs are equal to the raw_local_irq*()
  * if !TRACE_IRQFLAGS.
  */
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+#ifdef CONFIG_TRACE_IRQFLAGS
 #define local_irq_enable() \
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
@@ -107,22 +107,6 @@
 			raw_local_irq_restore(flags);	\
 		}					\
 	} while (0)
-#define local_save_flags(flags)				\
-	do {						\
-		raw_local_save_flags(flags);		\
-	} while (0)
-
-#define irqs_disabled_flags(flags)			\
-	({						\
-		raw_irqs_disabled_flags(flags);		\
-	})
-
-#define irqs_disabled()					\
-	({						\
-		unsigned long _flags;			\
-		raw_local_save_flags(_flags);		\
-		raw_irqs_disabled_flags(_flags);	\
-	})
 
 #define safe_halt()				\
 	do {					\
@@ -131,7 +115,7 @@
 	} while (0)
 
 
-#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
+#else /* !CONFIG_TRACE_IRQFLAGS */
 
 #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
 #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
@@ -140,11 +124,28 @@
 		raw_local_irq_save(flags);			\
 	} while (0)
 #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
-#define local_save_flags(flags)	do { raw_local_save_flags(flags); } while (0)
-#define irqs_disabled()		(raw_irqs_disabled())
-#define irqs_disabled_flags(flags) (raw_irqs_disabled_flags(flags))
 #define safe_halt()		do { raw_safe_halt(); } while (0)
 
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#define local_save_flags(flags)	raw_local_save_flags(flags)
+
+/*
+ * Some architectures don't define arch_irqs_disabled(), so even if either
+ * definition would be fine we need to use different ones for the time being
+ * to avoid build issues.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+#define irqs_disabled()					\
+	({						\
+		unsigned long _flags;			\
+		raw_local_save_flags(_flags);		\
+		raw_irqs_disabled_flags(_flags);	\
+	})
+#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
+#define irqs_disabled()	raw_irqs_disabled()
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
+#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
+
 #endif
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 98f923b6..f4de473 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -45,6 +45,12 @@
  * same as using STATIC_KEY_INIT_FALSE.
  */
 
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+# define HAVE_JUMP_LABEL
+#endif
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/bug.h>
@@ -55,7 +61,7 @@
 				    "%s used before call to jump_label_init", \
 				    __func__)
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 
 struct static_key {
 	atomic_t enabled;
@@ -66,13 +72,18 @@
 #endif
 };
 
-# include <asm/jump_label.h>
-# define HAVE_JUMP_LABEL
 #else
 struct static_key {
 	atomic_t enabled;
 };
-#endif	/* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
+#endif	/* HAVE_JUMP_LABEL */
+#endif /* __ASSEMBLY__ */
+
+#ifdef HAVE_JUMP_LABEL
+#include <asm/jump_label.h>
+#endif
+
+#ifndef __ASSEMBLY__
 
 enum jump_label_type {
 	JUMP_LABEL_DISABLE = 0,
@@ -203,3 +214,5 @@
 }
 
 #endif	/* _LINUX_JUMP_LABEL_H */
+
+#endif /* __ASSEMBLY__ */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d12b210..82af5d0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -165,12 +165,12 @@
 	KVM_NR_BUSES
 };
 
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val);
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-			    int len, const void *val, long cookie);
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
-		    void *val);
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+			    gpa_t addr, int len, const void *val, long cookie);
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+		    int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev);
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -658,7 +658,6 @@
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
 void *kvm_kvzalloc(unsigned long size);
-void kvm_kvfree(const void *addr);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
@@ -700,6 +699,20 @@
 #endif
 }
 
+#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
+/*
+ * returns true if the virtual interrupt controller is initialized and
+ * ready to accept virtual IRQ. On some architectures the virtual interrupt
+ * controller is dynamically instantiated and this is not always true.
+ */
+bool kvm_arch_intc_initialized(struct kvm *kvm);
+#else
+static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+	return true;
+}
+#endif
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
@@ -969,11 +982,16 @@
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
-static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
 }
 
+static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
 
 #else
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6b08cc1..8dad4a3 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -231,8 +231,7 @@
 	ATA_FLAG_SW_ACTIVITY	= (1 << 22), /* driver supports sw activity
 					      * led */
 	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
-	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
-	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
+	ATA_FLAG_SAS_HOST	= (1 << 24), /* SAS host */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h
index 234c991..67703f2 100644
--- a/include/linux/mfd/abx500/ux500_chargalg.h
+++ b/include/linux/mfd/abx500/ux500_chargalg.h
@@ -9,8 +9,13 @@
 
 #include <linux/power_supply.h>
 
-#define psy_to_ux500_charger(x) container_of((x), \
-		struct ux500_charger, psy)
+/*
+ * Valid only for supplies of type:
+ * - POWER_SUPPLY_TYPE_MAINS,
+ * - POWER_SUPPLY_TYPE_USB,
+ * because only them store as drv_data pointer to struct ux500_charger.
+ */
+#define psy_to_ux500_charger(x) power_supply_get_drvdata(psy)
 
 /* Forward declaration */
 struct ux500_charger;
@@ -35,7 +40,7 @@
  * @power_path		USB power path support
  */
 struct ux500_charger {
-	struct power_supply psy;
+	struct power_supply *psy;
 	struct ux500_charger_ops ops;
 	int max_out_volt;
 	int max_out_curr;
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index f0b6585..09a4ded 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -30,7 +30,7 @@
 #ifndef __LINUX_MFD_MAX77693_H
 #define __LINUX_MFD_MAX77693_H
 
-/* MAX77686 regulator IDs */
+/* MAX77693 regulator IDs */
 enum max77693_regulators {
 	MAX77693_ESAFEOUT1 = 0,
 	MAX77693_ESAFEOUT2,
@@ -38,12 +38,6 @@
 	MAX77693_REG_MAX,
 };
 
-struct max77693_regulator_data {
-	int id;
-	struct regulator_init_data *initdata;
-	struct device_node *of_node;
-};
-
 struct max77693_reg_data {
 	u8 addr;
 	u8 data;
@@ -103,10 +97,6 @@
 /* MAX77693 */
 
 struct max77693_platform_data {
-	/* regulator data */
-	struct max77693_regulator_data *regulators;
-	int num_regulators;
-
 	/* muic data */
 	struct max77693_muic_platform_data *muic_data;
 	struct max77693_led_platform_data *led_data;
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index ee7b1ce..bb270bd 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -117,6 +117,7 @@
 	int ldo_begin;
 	int ldo_end;
 	int max_reg;
+	bool has_regen3;
 	struct palmas_regs_info *palmas_regs_info;
 	struct of_regulator_match *palmas_matches;
 	struct palmas_sleep_requestor_info *sleep_req_info;
diff --git a/include/linux/mfd/rt5033.h b/include/linux/mfd/rt5033.h
index 010cff4..6cff5cf 100644
--- a/include/linux/mfd/rt5033.h
+++ b/include/linux/mfd/rt5033.h
@@ -39,7 +39,7 @@
 	struct i2c_client	*client;
 	struct rt5033_dev	*rt5033;
 	struct regmap		*regmap;
-	struct power_supply	psy;
+	struct power_supply	*psy;
 };
 
 /* RT5033 charger platform data */
diff --git a/include/linux/mfd/stw481x.h b/include/linux/mfd/stw481x.h
index eda1215..833074b 100644
--- a/include/linux/mfd/stw481x.h
+++ b/include/linux/mfd/stw481x.h
@@ -41,15 +41,11 @@
 
 /**
  * struct stw481x - state holder for the Stw481x drivers
- * @mutex: mutex to serialize I2C accesses
  * @i2c_client: corresponding I2C client
- * @regulator: regulator device for regulator children
  * @map: regmap handle to access device registers
  */
 struct stw481x {
-	struct mutex		lock;
 	struct i2c_client	*client;
-	struct regulator_dev	*vmmc_regulator;
 	struct regmap		*map;
 };
 
diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h
index 2b94793..8dc93673 100644
--- a/include/linux/mfd/wm8350/supply.h
+++ b/include/linux/mfd/wm8350/supply.h
@@ -123,9 +123,9 @@
 
 struct wm8350_power {
 	struct platform_device *pdev;
-	struct power_supply battery;
-	struct power_supply usb;
-	struct power_supply ac;
+	struct power_supply *battery;
+	struct power_supply *usb;
+	struct power_supply *ac;
 	struct wm8350_charger_policy *policy;
 
 	int rev_g_coeff;
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 160448f..de722d4e 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -79,7 +79,7 @@
 #define mmc_cmd_type(cmd)	((cmd)->flags & MMC_CMD_MASK)
 
 	unsigned int		retries;	/* max number of retries */
-	unsigned int		error;		/* command error */
+	int			error;		/* command error */
 
 /*
  * Standard errno values are used for errors, but some have specific
@@ -108,7 +108,7 @@
 	unsigned int		timeout_clks;	/* data timeout (in clocks) */
 	unsigned int		blksz;		/* data block size */
 	unsigned int		blocks;		/* number of blocks */
-	unsigned int		error;		/* data error */
+	int			error;		/* data error */
 	unsigned int		flags;
 
 #define MMC_DATA_WRITE	(1 << 8)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 471fb31..1211199 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -44,6 +44,7 @@
  * struct dw_mci - MMC controller state shared between all slots
  * @lock: Spinlock protecting the queue and associated data.
  * @regs: Pointer to MMIO registers.
+ * @fifo_reg: Pointer to MMIO registers for data FIFO
  * @sg: Scatterlist entry currently being processed by PIO code, if any.
  * @sg_miter: PIO mapping scatterlist iterator.
  * @cur_slot: The slot which is currently using the controller.
@@ -79,7 +80,6 @@
  * @current_speed: Configured rate of the controller.
  * @num_slots: Number of slots available.
  * @verid: Denote Version ID.
- * @data_offset: Set the offset of DATA register according to VERID.
  * @dev: Device associated with the MMC controller.
  * @pdata: Platform data associated with the MMC controller.
  * @drv_data: Driver specific data for identified variant of the controller
@@ -132,6 +132,7 @@
 	spinlock_t		lock;
 	spinlock_t		irq_lock;
 	void __iomem		*regs;
+	void __iomem		*fifo_reg;
 
 	struct scatterlist	*sg;
 	struct sg_mapping_iter	sg_miter;
@@ -172,7 +173,6 @@
 	u32			num_slots;
 	u32			fifoth_val;
 	u16			verid;
-	u16			data_offset;
 	struct device		*dev;
 	struct dw_mci_board	*pdata;
 	const struct dw_mci_drv_data	*drv_data;
@@ -202,6 +202,8 @@
 	int			irq;
 
 	int			sdio_id0;
+
+	struct timer_list       cmd11_timer;
 };
 
 /* DMA ops for Internal/External DMAC interface */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0c8cbe5..b5bedae 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -80,12 +80,6 @@
 
 struct mmc_host_ops {
 	/*
-	 * 'enable' is called when the host is claimed and 'disable' is called
-	 * when the host is released. 'enable' and 'disable' are deprecated.
-	 */
-	int (*enable)(struct mmc_host *host);
-	int (*disable)(struct mmc_host *host);
-	/*
 	 * It is optional for the host to implement pre_req and post_req in
 	 * order to support double buffering of requests (prepare one
 	 * request while another request is active).
diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h
deleted file mode 100644
index 8cc095a..0000000
--- a/include/linux/mmc/sdhci-spear.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * include/linux/mmc/sdhci-spear.h
- *
- * SDHCI declarations specific to ST SPEAr platform
- *
- * Copyright (C) 2010 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef LINUX_MMC_SDHCI_SPEAR_H
-#define LINUX_MMC_SDHCI_SPEAR_H
-
-#include <linux/platform_device.h>
-/*
- * struct sdhci_plat_data: spear sdhci platform data structure
- *
- * card_int_gpio: gpio pin used for card detection
- */
-struct sdhci_plat_data {
-	int card_int_gpio;
-};
-
-/* This function is used to set platform_data field of pdev->dev */
-static inline void
-sdhci_set_plat_data(struct platform_device *pdev, struct sdhci_plat_data *data)
-{
-	pdev->dev.platform_data = data;
-}
-
-#endif /* LINUX_MMC_SDHCI_SPEAR_H */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
deleted file mode 100644
index c3e3db1..0000000
--- a/include/linux/mmc/sdhci.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- *  linux/include/linux/mmc/sdhci.h - Secure Digital Host Controller Interface
- *
- *  Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-#ifndef LINUX_MMC_SDHCI_H
-#define LINUX_MMC_SDHCI_H
-
-#include <linux/scatterlist.h>
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/mmc/host.h>
-
-struct sdhci_host_next {
-	unsigned int	sg_count;
-	s32		cookie;
-};
-
-struct sdhci_host {
-	/* Data set by hardware interface driver */
-	const char *hw_name;	/* Hardware bus name */
-
-	unsigned int quirks;	/* Deviations from spec. */
-
-/* Controller doesn't honor resets unless we touch the clock register */
-#define SDHCI_QUIRK_CLOCK_BEFORE_RESET			(1<<0)
-/* Controller has bad caps bits, but really supports DMA */
-#define SDHCI_QUIRK_FORCE_DMA				(1<<1)
-/* Controller doesn't like to be reset when there is no card inserted. */
-#define SDHCI_QUIRK_NO_CARD_NO_RESET			(1<<2)
-/* Controller doesn't like clearing the power reg before a change */
-#define SDHCI_QUIRK_SINGLE_POWER_WRITE			(1<<3)
-/* Controller has flaky internal state so reset it on each ios change */
-#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS		(1<<4)
-/* Controller has an unusable DMA engine */
-#define SDHCI_QUIRK_BROKEN_DMA				(1<<5)
-/* Controller has an unusable ADMA engine */
-#define SDHCI_QUIRK_BROKEN_ADMA				(1<<6)
-/* Controller can only DMA from 32-bit aligned addresses */
-#define SDHCI_QUIRK_32BIT_DMA_ADDR			(1<<7)
-/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_DMA_SIZE			(1<<8)
-/* Controller can only ADMA chunks that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_ADMA_SIZE			(1<<9)
-/* Controller needs to be reset after each request to stay stable */
-#define SDHCI_QUIRK_RESET_AFTER_REQUEST			(1<<10)
-/* Controller needs voltage and power writes to happen separately */
-#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER		(1<<11)
-/* Controller provides an incorrect timeout value for transfers */
-#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1<<12)
-/* Controller has an issue with buffer bits for small transfers */
-#define SDHCI_QUIRK_BROKEN_SMALL_PIO			(1<<13)
-/* Controller does not provide transfer-complete interrupt when not busy */
-#define SDHCI_QUIRK_NO_BUSY_IRQ				(1<<14)
-/* Controller has unreliable card detection */
-#define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
-/* Controller reports inverted write-protect state */
-#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
-/* Controller does not like fast PIO transfers */
-#define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
-/* Controller has to be forced to use block size of 2048 bytes */
-#define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
-/* Controller cannot do multi-block transfers */
-#define SDHCI_QUIRK_NO_MULTIBLOCK			(1<<21)
-/* Controller can only handle 1-bit data transfers */
-#define SDHCI_QUIRK_FORCE_1_BIT_DATA			(1<<22)
-/* Controller needs 10ms delay between applying power and clock */
-#define SDHCI_QUIRK_DELAY_AFTER_POWER			(1<<23)
-/* Controller uses SDCLK instead of TMCLK for data timeouts */
-#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1<<24)
-/* Controller reports wrong base clock capability */
-#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN		(1<<25)
-/* Controller cannot support End Attribute in NOP ADMA descriptor */
-#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC		(1<<26)
-/* Controller is missing device caps. Use caps provided by host */
-#define SDHCI_QUIRK_MISSING_CAPS			(1<<27)
-/* Controller uses Auto CMD12 command to stop the transfer */
-#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12		(1<<28)
-/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
-#define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
-/* Controller treats ADMA descriptors with length 0000h incorrectly */
-#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC		(1<<30)
-/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */
-#define SDHCI_QUIRK_UNSTABLE_RO_DETECT			(1<<31)
-
-	unsigned int quirks2;	/* More deviations from spec. */
-
-#define SDHCI_QUIRK2_HOST_OFF_CARD_ON			(1<<0)
-#define SDHCI_QUIRK2_HOST_NO_CMD23			(1<<1)
-/* The system physically doesn't support 1.8v, even if the host does */
-#define SDHCI_QUIRK2_NO_1_8_V				(1<<2)
-#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN		(1<<3)
-#define SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON		(1<<4)
-/* Controller has a non-standard host control register */
-#define SDHCI_QUIRK2_BROKEN_HOST_CONTROL		(1<<5)
-/* Controller does not support HS200 */
-#define SDHCI_QUIRK2_BROKEN_HS200			(1<<6)
-/* Controller does not support DDR50 */
-#define SDHCI_QUIRK2_BROKEN_DDR50			(1<<7)
-/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
-#define SDHCI_QUIRK2_STOP_WITH_TC			(1<<8)
-/* Controller does not support 64-bit DMA */
-#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA			(1<<9)
-/* need clear transfer mode register before send cmd */
-#define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD	(1<<10)
-/* Capability register bit-63 indicates HS400 support */
-#define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400		(1<<11)
-/* forced tuned clock */
-#define SDHCI_QUIRK2_TUNING_WORK_AROUND			(1<<12)
-/* disable the block count for single block transactions */
-#define SDHCI_QUIRK2_SUPPORT_SINGLE			(1<<13)
-
-	int irq;		/* Device IRQ */
-	void __iomem *ioaddr;	/* Mapped address */
-
-	const struct sdhci_ops *ops;	/* Low level hw interface */
-
-	/* Internal data */
-	struct mmc_host *mmc;	/* MMC structure */
-	u64 dma_mask;		/* custom DMA mask */
-
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
-	struct led_classdev led;	/* LED control */
-	char led_name[32];
-#endif
-
-	spinlock_t lock;	/* Mutex */
-
-	int flags;		/* Host attributes */
-#define SDHCI_USE_SDMA		(1<<0)	/* Host is SDMA capable */
-#define SDHCI_USE_ADMA		(1<<1)	/* Host is ADMA capable */
-#define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
-#define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
-#define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
-#define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
-#define SDHCI_AUTO_CMD12	(1<<6)	/* Auto CMD12 support */
-#define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
-#define SDHCI_PV_ENABLED	(1<<8)	/* Preset value enabled */
-#define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
-#define SDHCI_SDR104_NEEDS_TUNING (1<<10)	/* SDR104/HS200 needs tuning */
-#define SDHCI_USING_RETUNING_TIMER (1<<11)	/* Host is using a retuning timer for the card */
-#define SDHCI_USE_64_BIT_DMA	(1<<12)	/* Use 64-bit DMA */
-#define SDHCI_HS400_TUNING	(1<<13)	/* Tuning for HS400 */
-
-	unsigned int version;	/* SDHCI spec. version */
-
-	unsigned int max_clk;	/* Max possible freq (MHz) */
-	unsigned int timeout_clk;	/* Timeout freq (KHz) */
-	unsigned int clk_mul;	/* Clock Muliplier value */
-
-	unsigned int clock;	/* Current clock (MHz) */
-	u8 pwr;			/* Current voltage */
-
-	bool runtime_suspended;	/* Host is runtime suspended */
-	bool bus_on;		/* Bus power prevents runtime suspend */
-	bool preset_enabled;	/* Preset is enabled */
-
-	struct mmc_request *mrq;	/* Current request */
-	struct mmc_command *cmd;	/* Current command */
-	struct mmc_data *data;	/* Current data request */
-	unsigned int data_early:1;	/* Data finished before cmd */
-	unsigned int busy_handle:1;	/* Handling the order of Busy-end */
-
-	struct sg_mapping_iter sg_miter;	/* SG state for PIO */
-	unsigned int blocks;	/* remaining PIO blocks */
-
-	int sg_count;		/* Mapped sg entries */
-
-	void *adma_table;	/* ADMA descriptor table */
-	void *align_buffer;	/* Bounce buffer */
-
-	size_t adma_table_sz;	/* ADMA descriptor table size */
-	size_t align_buffer_sz;	/* Bounce buffer size */
-
-	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
-	dma_addr_t align_addr;	/* Mapped bounce buffer */
-
-	unsigned int desc_sz;	/* ADMA descriptor size */
-	unsigned int align_sz;	/* ADMA alignment */
-	unsigned int align_mask;	/* ADMA alignment mask */
-
-	struct tasklet_struct finish_tasklet;	/* Tasklet structures */
-
-	struct timer_list timer;	/* Timer for timeouts */
-
-	u32 caps;		/* Alternative CAPABILITY_0 */
-	u32 caps1;		/* Alternative CAPABILITY_1 */
-
-	unsigned int            ocr_avail_sdio;	/* OCR bit masks */
-	unsigned int            ocr_avail_sd;
-	unsigned int            ocr_avail_mmc;
-	u32 ocr_mask;		/* available voltages */
-
-	unsigned		timing;		/* Current timing */
-
-	u32			thread_isr;
-
-	/* cached registers */
-	u32			ier;
-
-	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
-	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
-
-	unsigned int		tuning_count;	/* Timer count for re-tuning */
-	unsigned int		tuning_mode;	/* Re-tuning mode supported by host */
-#define SDHCI_TUNING_MODE_1	0
-	struct timer_list	tuning_timer;	/* Timer for tuning */
-
-	struct sdhci_host_next	next_data;
-	unsigned long private[0] ____cacheline_aligned;
-};
-#endif /* LINUX_MMC_SDHCI_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f279d9c..2782df4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -474,16 +474,15 @@
 	unsigned long		wait_table_bits;
 
 	ZONE_PADDING(_pad1_)
-
-	/* Write-intensive fields used from the page allocator */
-	spinlock_t		lock;
-
 	/* free areas of different sizes */
 	struct free_area	free_area[MAX_ORDER];
 
 	/* zone flags, see below */
 	unsigned long		flags;
 
+	/* Write-intensive fields used from the page allocator */
+	spinlock_t		lock;
+
 	ZONE_PADDING(_pad2_)
 
 	/* Write-intensive fields used by page reclaim */
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index ef37021..22801b1 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -53,6 +53,7 @@
 	return of_node_get(cpu_dev->of_node);
 }
 
+void of_dma_configure(struct device *dev, struct device_node *np);
 #else /* CONFIG_OF */
 
 static inline int of_driver_match_device(struct device *dev,
@@ -90,6 +91,8 @@
 {
 	return NULL;
 }
+static inline void of_dma_configure(struct device *dev, struct device_node *np)
+{}
 #endif /* CONFIG_OF */
 
 #endif /* _LINUX_OF_DEVICE_H */
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 16c7554..ffbe470 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -12,7 +12,8 @@
 			     size_t *size);
 
 extern void of_iommu_init(void);
-extern struct iommu_ops *of_iommu_configure(struct device *dev);
+extern struct iommu_ops *of_iommu_configure(struct device *dev,
+					struct device_node *master_np);
 
 #else
 
@@ -24,7 +25,8 @@
 }
 
 static inline void of_iommu_init(void) { }
-static inline struct iommu_ops *of_iommu_configure(struct device *dev)
+static inline struct iommu_ops *of_iommu_configure(struct device *dev,
+					 struct device_node *master_np)
 {
 	return NULL;
 }
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index ce0e5ab..29fd3fe 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -16,6 +16,7 @@
 int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
+void of_pci_dma_configure(struct pci_dev *pci_dev);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -50,6 +51,8 @@
 {
 	return -1;
 }
+
+static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { }
 #endif
 
 #if defined(CONFIG_OF_ADDRESS)
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 24c7728..a965efa 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -77,6 +77,11 @@
 static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
 #endif
 
+extern const u8 pci_acpi_dsm_uuid[];
+#define DEVICE_LABEL_DSM	0x07
+#define RESET_DELAY_DSM		0x08
+#define FUNCTION_DELAY_DSM	0x09
+
 #else	/* CONFIG_ACPI */
 static inline void acpi_pci_add_bus(struct pci_bus *bus) { }
 static inline void acpi_pci_remove_bus(struct pci_bus *bus) { }
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 8af4610..207c561 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -29,7 +29,6 @@
 void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 void pci_disable_link_state(struct pci_dev *pdev, int state);
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-void pcie_clear_aspm(struct pci_bus *bus);
 void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -47,9 +46,6 @@
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-static inline void pcie_clear_aspm(struct pci_bus *bus)
-{
-}
 static inline void pcie_no_aspm(void)
 {
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 211e9da..e63112f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -406,6 +406,7 @@
 	struct list_head windows;	/* resource_entry */
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
+	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
 };
 
 #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
@@ -510,6 +511,9 @@
 	return dev->bus->self;
 }
 
+struct device *pci_get_host_bridge_device(struct pci_dev *dev);
+void pci_put_host_bridge_device(struct device *dev);
+
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 67bbcf0..8e981be 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -55,9 +55,6 @@
 	u32 caps;	/* Used for the MMC driver on 2430 and later */
 	u32 pm_caps;	/* PM capabilities of the mmc */
 
-	/* switch pin can be for card detect (default) or card cover */
-	unsigned cover:1;
-
 	/* use the internal clock */
 	unsigned internal_clock:1;
 
@@ -73,7 +70,8 @@
 #define HSMMC_HAS_HSPE_SUPPORT	(1 << 2)
 	unsigned features;
 
-	int switch_pin;			/* gpio (card detect) */
+	int gpio_cd;			/* gpio (card detect) */
+	int gpio_cod;			/* gpio (cover detect) */
 	int gpio_wp;			/* gpio (write protect) */
 
 	int (*set_power)(struct device *dev, int power_on, int vdd);
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 416ebeb..eadf28c 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -242,7 +242,8 @@
 	int emergency_stop;
 
 	char psy_name_buf[PSY_NAME_MAX + 1];
-	struct power_supply charger_psy;
+	struct power_supply_desc charger_psy_desc;
+	struct power_supply *charger_psy;
 
 	u64 charging_start_time;
 	u64 charging_end_time;
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index 89dd84f..cf112b4 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -126,7 +126,14 @@
 	MAX17047_QRTbl30	= 0x42,
 };
 
-enum max170xx_chip_type {MAX17042, MAX17047};
+enum max170xx_chip_type {
+	MAXIM_DEVICE_TYPE_UNKNOWN	= 0,
+	MAXIM_DEVICE_TYPE_MAX17042,
+	MAXIM_DEVICE_TYPE_MAX17047,
+	MAXIM_DEVICE_TYPE_MAX17050,
+
+	MAXIM_DEVICE_TYPE_NUM
+};
 
 /*
  * used for setting a register to a desired value
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 096dbce..75a1dd8 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -13,6 +13,7 @@
 #ifndef __LINUX_POWER_SUPPLY_H__
 #define __LINUX_POWER_SUPPLY_H__
 
+#include <linux/device.h>
 #include <linux/workqueue.h>
 #include <linux/leds.h>
 #include <linux/spinlock.h>
@@ -173,22 +174,32 @@
 	const char *strval;
 };
 
-struct device;
 struct device_node;
+struct power_supply;
 
-struct power_supply {
+/* Run-time specific power supply configuration */
+struct power_supply_config {
+	struct device_node *of_node;
+	/* Driver private data */
+	void *drv_data;
+
+	char **supplied_to;
+	size_t num_supplicants;
+};
+
+/* Description of power supply */
+struct power_supply_desc {
 	const char *name;
 	enum power_supply_type type;
 	enum power_supply_property *properties;
 	size_t num_properties;
 
-	char **supplied_to;
-	size_t num_supplicants;
-
-	char **supplied_from;
-	size_t num_supplies;
-	struct device_node *of_node;
-
+	/*
+	 * Functions for drivers implementing power supply class.
+	 * These shouldn't be called directly by other drivers for accessing
+	 * this power supply. Instead use power_supply_*() functions (for
+	 * example power_supply_get_property()).
+	 */
 	int (*get_property)(struct power_supply *psy,
 			    enum power_supply_property psp,
 			    union power_supply_propval *val);
@@ -208,12 +219,27 @@
 	bool no_thermal;
 	/* For APM emulation, think legacy userspace. */
 	int use_for_apm;
+};
+
+struct power_supply {
+	const struct power_supply_desc *desc;
+
+	char **supplied_to;
+	size_t num_supplicants;
+
+	char **supplied_from;
+	size_t num_supplies;
+	struct device_node *of_node;
+
+	/* Driver private data */
+	void *drv_data;
 
 	/* private */
-	struct device *dev;
+	struct device dev;
 	struct work_struct changed_work;
 	spinlock_t changed_lock;
 	bool changed;
+	atomic_t use_cnt;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
 	struct thermal_cooling_device *tcd;
@@ -256,6 +282,7 @@
 extern int power_supply_reg_notifier(struct notifier_block *nb);
 extern void power_supply_unreg_notifier(struct notifier_block *nb);
 extern struct power_supply *power_supply_get_by_name(const char *name);
+extern void power_supply_put(struct power_supply *psy);
 #ifdef CONFIG_OF
 extern struct power_supply *power_supply_get_by_phandle(struct device_node *np,
 							const char *property);
@@ -274,13 +301,36 @@
 static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
 #endif
 
-extern int power_supply_register(struct device *parent,
-				 struct power_supply *psy);
-extern int power_supply_register_no_ws(struct device *parent,
-				 struct power_supply *psy);
+extern int power_supply_get_property(struct power_supply *psy,
+			    enum power_supply_property psp,
+			    union power_supply_propval *val);
+extern int power_supply_set_property(struct power_supply *psy,
+			    enum power_supply_property psp,
+			    const union power_supply_propval *val);
+extern int power_supply_property_is_writeable(struct power_supply *psy,
+					enum power_supply_property psp);
+extern void power_supply_external_power_changed(struct power_supply *psy);
+
+extern struct power_supply *__must_check
+power_supply_register(struct device *parent,
+				 const struct power_supply_desc *desc,
+				 const struct power_supply_config *cfg);
+extern struct power_supply *__must_check
+power_supply_register_no_ws(struct device *parent,
+				 const struct power_supply_desc *desc,
+				 const struct power_supply_config *cfg);
+extern struct power_supply *__must_check
+devm_power_supply_register(struct device *parent,
+				 const struct power_supply_desc *desc,
+				 const struct power_supply_config *cfg);
+extern struct power_supply *__must_check
+devm_power_supply_register_no_ws(struct device *parent,
+				 const struct power_supply_desc *desc,
+				 const struct power_supply_config *cfg);
 extern void power_supply_unregister(struct power_supply *psy);
 extern int power_supply_powers(struct power_supply *psy, struct device *dev);
 
+extern void *power_supply_get_drvdata(struct power_supply *psy);
 /* For APM emulation, think legacy userspace. */
 extern struct class *power_supply_class;
 
diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h
index b6c4909..15fa8f2 100644
--- a/include/linux/regulator/act8865.h
+++ b/include/linux/regulator/act8865.h
@@ -19,6 +19,19 @@
 #include <linux/regulator/machine.h>
 
 enum {
+	ACT8600_ID_DCDC1,
+	ACT8600_ID_DCDC2,
+	ACT8600_ID_DCDC3,
+	ACT8600_ID_SUDCDC4,
+	ACT8600_ID_LDO5,
+	ACT8600_ID_LDO6,
+	ACT8600_ID_LDO7,
+	ACT8600_ID_LDO8,
+	ACT8600_ID_LDO9,
+	ACT8600_ID_LDO10,
+};
+
+enum {
 	ACT8865_ID_DCDC1,
 	ACT8865_ID_DCDC2,
 	ACT8865_ID_DCDC3,
@@ -46,6 +59,7 @@
 };
 
 enum {
+	ACT8600,
 	ACT8865,
 	ACT8846,
 };
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index d17e1ff..f8a689e 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -114,7 +114,7 @@
 #define REGULATOR_EVENT_OVER_TEMP		0x10
 #define REGULATOR_EVENT_FORCE_DISABLE		0x20
 #define REGULATOR_EVENT_VOLTAGE_CHANGE		0x40
-#define REGULATOR_EVENT_DISABLE 		0x80
+#define REGULATOR_EVENT_DISABLE			0x80
 #define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE	0x100
 #define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE	0x200
 #define REGULATOR_EVENT_PRE_DISABLE		0x400
@@ -238,7 +238,7 @@
 
 int regulator_set_mode(struct regulator *regulator, unsigned int mode);
 unsigned int regulator_get_mode(struct regulator *regulator);
-int regulator_set_optimum_mode(struct regulator *regulator, int load_uA);
+int regulator_set_load(struct regulator *regulator, int load_uA);
 
 int regulator_allow_bypass(struct regulator *regulator, bool allow);
 
@@ -252,8 +252,12 @@
 /* regulator notifier block */
 int regulator_register_notifier(struct regulator *regulator,
 			      struct notifier_block *nb);
+int devm_regulator_register_notifier(struct regulator *regulator,
+				     struct notifier_block *nb);
 int regulator_unregister_notifier(struct regulator *regulator,
 				struct notifier_block *nb);
+void devm_regulator_unregister_notifier(struct regulator *regulator,
+					struct notifier_block *nb);
 
 /* driver data - core doesn't touch */
 void *regulator_get_drvdata(struct regulator *regulator);
@@ -479,8 +483,7 @@
 	return REGULATOR_MODE_NORMAL;
 }
 
-static inline int regulator_set_optimum_mode(struct regulator *regulator,
-					int load_uA)
+static inline int regulator_set_load(struct regulator *regulator, int load_uA)
 {
 	return REGULATOR_MODE_NORMAL;
 }
@@ -515,12 +518,24 @@
 	return 0;
 }
 
+static inline int devm_regulator_register_notifier(struct regulator *regulator,
+						   struct notifier_block *nb)
+{
+	return 0;
+}
+
 static inline int regulator_unregister_notifier(struct regulator *regulator,
 				struct notifier_block *nb)
 {
 	return 0;
 }
 
+static inline int devm_regulator_unregister_notifier(struct regulator *regulator,
+						     struct notifier_block *nb)
+{
+	return 0;
+}
+
 static inline void *regulator_get_drvdata(struct regulator *regulator)
 {
 	return NULL;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 045f709..fffa688 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -98,6 +98,7 @@
  *	REGULATOR_STATUS value (or negative errno)
  * @get_optimum_mode: Get the most efficient operating mode for the regulator
  *                    when running with the specified parameters.
+ * @set_load: Set the load for the regulator.
  *
  * @set_bypass: Set the regulator in bypass mode.
  * @get_bypass: Get the regulator bypass mode state.
@@ -167,6 +168,8 @@
 	/* get most efficient regulator operating mode for load */
 	unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV,
 					  int output_uV, int load_uA);
+	/* set the load on the regulator */
+	int (*set_load)(struct regulator_dev *, int load_uA);
 
 	/* control and report on bypass mode */
 	int (*set_bypass)(struct regulator_dev *dev, bool enable);
@@ -367,6 +370,7 @@
 	struct device dev;
 	struct regulation_constraints *constraints;
 	struct regulator *supply;	/* for tree */
+	const char *supply_name;
 	struct regmap *regmap;
 
 	struct delayed_work disable_work;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index dcad7ee..8dcf682 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -77,6 +77,7 @@
 	int (*read_alarm)(struct device *, struct rtc_wkalrm *);
 	int (*set_alarm)(struct device *, struct rtc_wkalrm *);
 	int (*proc)(struct device *, struct seq_file *);
+	int (*set_mmss64)(struct device *, time64_t secs);
 	int (*set_mmss)(struct device *, unsigned long secs);
 	int (*read_callback)(struct device *, int data);
 	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a419b65..f74d4cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,6 +176,14 @@
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
 
+/* Notifier for when a task gets migrated to a new CPU */
+struct task_migration_notifier {
+	struct task_struct *task;
+	int from_cpu;
+	int to_cpu;
+};
+extern void register_task_migration_notifier(struct notifier_block *n);
+
 extern unsigned long get_parent_ip(unsigned long addr);
 
 extern void dump_cpu_task(int cpu);
@@ -329,6 +337,8 @@
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
+extern cpumask_var_t cpu_isolated_map;
+
 extern int runqueue_is_locked(int cpu);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
@@ -1115,15 +1125,28 @@
 };
 
 struct sched_avg {
+	u64 last_runnable_update;
+	s64 decay_count;
+	/*
+	 * utilization_avg_contrib describes the amount of time that a
+	 * sched_entity is running on a CPU. It is based on running_avg_sum
+	 * and is scaled in the range [0..SCHED_LOAD_SCALE].
+	 * load_avg_contrib described the amount of time that a sched_entity
+	 * is runnable on a rq. It is based on both runnable_avg_sum and the
+	 * weight of the task.
+	 */
+	unsigned long load_avg_contrib, utilization_avg_contrib;
 	/*
 	 * These sums represent an infinite geometric series and so are bound
 	 * above by 1024/(1-y).  Thus we only need a u32 to store them for all
 	 * choices of y < 1-2^(-32)*1024.
+	 * running_avg_sum reflects the time that the sched_entity is
+	 * effectively running on the CPU.
+	 * runnable_avg_sum represents the amount of time a sched_entity is on
+	 * a runqueue which includes the running time that is monitored by
+	 * running_avg_sum.
 	 */
-	u32 runnable_avg_sum, runnable_avg_period;
-	u64 last_runnable_update;
-	s64 decay_count;
-	unsigned long load_avg_contrib;
+	u32 runnable_avg_sum, avg_period, running_avg_sum;
 };
 
 #ifdef CONFIG_SCHEDSTATS
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f5df8f6..5f68d0a 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -108,7 +108,7 @@
 	unsigned ret;
 
 repeat:
-	ret = ACCESS_ONCE(s->sequence);
+	ret = READ_ONCE(s->sequence);
 	if (unlikely(ret & 1)) {
 		cpu_relax();
 		goto repeat;
@@ -127,7 +127,7 @@
  */
 static inline unsigned raw_read_seqcount(const seqcount_t *s)
 {
-	unsigned ret = ACCESS_ONCE(s->sequence);
+	unsigned ret = READ_ONCE(s->sequence);
 	smp_rmb();
 	return ret;
 }
@@ -179,7 +179,7 @@
  */
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
-	unsigned ret = ACCESS_ONCE(s->sequence);
+	unsigned ret = READ_ONCE(s->sequence);
 	smp_rmb();
 	return ret & ~1;
 }
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 856d34d..d673072 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -162,8 +162,6 @@
  * @remove: Unbinds this driver from the spi device
  * @shutdown: Standard shutdown callback used during system state
  *	transitions such as powerdown/halt and kexec
- * @suspend: Standard suspend callback used during system state transitions
- * @resume: Standard resume callback used during system state transitions
  * @driver: SPI device drivers should initialize the name and owner
  *	field of this structure.
  *
@@ -184,8 +182,6 @@
 	int			(*probe)(struct spi_device *spi);
 	int			(*remove)(struct spi_device *spi);
 	void			(*shutdown)(struct spi_device *spi);
-	int			(*suspend)(struct spi_device *spi, pm_message_t mesg);
-	int			(*resume)(struct spi_device *spi);
 	struct device_driver	driver;
 };
 
@@ -294,6 +290,8 @@
  *                    transfer_one_message are mutually exclusive; when both
  *                    are set, the generic subsystem does not call your
  *                    transfer_one callback.
+ * @handle_err: the subsystem calls the driver to handle an error that occurs
+ *		in the generic implementation of transfer_one_message().
  * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
@@ -448,6 +446,8 @@
 	void (*set_cs)(struct spi_device *spi, bool enable);
 	int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *transfer);
+	void (*handle_err)(struct spi_master *master,
+			   struct spi_message *message);
 
 	/* gpio chip select */
 	int			*cs_gpios;
diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index f4aec0e..076af43 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -19,3 +19,12 @@
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
 #endif
+
+/**
+ * offsetofend(TYPE, MEMBER)
+ *
+ * @TYPE: The type of the structure
+ * @MEMBER: The member within the structure to get the end offset of
+ */
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER)	+ sizeof(((TYPE *)0)->MEMBER))
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9c085dc..f8492da5 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -1,7 +1,5 @@
-/*  linux/include/linux/tick.h
- *
- *  This file contains the structure definitions for tick related functions
- *
+/*
+ * Tick related global functions
  */
 #ifndef _LINUX_TICK_H
 #define _LINUX_TICK_H
@@ -9,149 +7,99 @@
 #include <linux/clockchips.h>
 #include <linux/irqflags.h>
 #include <linux/percpu.h>
-#include <linux/hrtimer.h>
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
-
-enum tick_device_mode {
-	TICKDEV_MODE_PERIODIC,
-	TICKDEV_MODE_ONESHOT,
-};
-
-struct tick_device {
-	struct clock_event_device *evtdev;
-	enum tick_device_mode mode;
-};
-
-enum tick_nohz_mode {
-	NOHZ_MODE_INACTIVE,
-	NOHZ_MODE_LOWRES,
-	NOHZ_MODE_HIGHRES,
-};
-
-/**
- * struct tick_sched - sched tick emulation and no idle tick control/stats
- * @sched_timer:	hrtimer to schedule the periodic tick in high
- *			resolution mode
- * @last_tick:		Store the last tick expiry time when the tick
- *			timer is modified for nohz sleeps. This is necessary
- *			to resume the tick timer operation in the timeline
- *			when the CPU returns from nohz sleep.
- * @tick_stopped:	Indicator that the idle tick has been stopped
- * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
- * @idle_calls:		Total number of idle calls
- * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
- * @idle_entrytime:	Time when the idle call was entered
- * @idle_waketime:	Time when the idle was interrupted
- * @idle_exittime:	Time when the idle state was left
- * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
- * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:	Duration of the current idle sleep
- * @do_timer_lst:	CPU was the last one doing do_timer before going idle
- */
-struct tick_sched {
-	struct hrtimer			sched_timer;
-	unsigned long			check_clocks;
-	enum tick_nohz_mode		nohz_mode;
-	ktime_t				last_tick;
-	int				inidle;
-	int				tick_stopped;
-	unsigned long			idle_jiffies;
-	unsigned long			idle_calls;
-	unsigned long			idle_sleeps;
-	int				idle_active;
-	ktime_t				idle_entrytime;
-	ktime_t				idle_waketime;
-	ktime_t				idle_exittime;
-	ktime_t				idle_sleeptime;
-	ktime_t				iowait_sleeptime;
-	ktime_t				sleep_length;
-	unsigned long			last_jiffies;
-	unsigned long			next_jiffies;
-	ktime_t				idle_expires;
-	int				do_timer_last;
-};
-
 extern void __init tick_init(void);
-extern int tick_is_oneshot_available(void);
-extern struct tick_device *tick_get_device(int cpu);
-
 extern void tick_freeze(void);
 extern void tick_unfreeze(void);
-
-# ifdef CONFIG_HIGH_RES_TIMERS
-extern int tick_init_highres(void);
-extern int tick_program_event(ktime_t expires, int force);
-extern void tick_setup_sched_timer(void);
-# endif
-
-# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-extern void tick_cancel_sched_timer(int cpu);
-# else
-static inline void tick_cancel_sched_timer(int cpu) { }
-# endif
-
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern struct tick_device *tick_get_broadcast_device(void);
-extern struct cpumask *tick_get_broadcast_mask(void);
-
-#  ifdef CONFIG_TICK_ONESHOT
-extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
-#  endif
-
-# endif /* BROADCAST */
-
-# ifdef CONFIG_TICK_ONESHOT
-extern void tick_clock_notify(void);
-extern int tick_check_oneshot_change(int allow_nohz);
-extern struct tick_sched *tick_get_tick_sched(int cpu);
-extern void tick_irq_enter(void);
-extern int tick_oneshot_mode_active(void);
-#  ifndef arch_needs_cpu
-#   define arch_needs_cpu() (0)
-#  endif
-# else
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-static inline void tick_irq_enter(void) { }
-static inline int tick_oneshot_mode_active(void) { return 0; }
-# endif
-
+/* Should be core only, but ARM BL switcher requires it */
+extern void tick_suspend_local(void);
+/* Should be core only, but XEN resume magic and ARM BL switcher require it */
+extern void tick_resume_local(void);
+extern void tick_handover_do_timer(void);
+extern void tick_cleanup_dead_cpu(int cpu);
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
 static inline void tick_init(void) { }
 static inline void tick_freeze(void) { }
 static inline void tick_unfreeze(void) { }
-static inline void tick_cancel_sched_timer(int cpu) { }
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-static inline void tick_irq_enter(void) { }
-static inline int tick_oneshot_mode_active(void) { return 0; }
+static inline void tick_suspend_local(void) { }
+static inline void tick_resume_local(void) { }
+static inline void tick_handover_do_timer(void) { }
+static inline void tick_cleanup_dead_cpu(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-# ifdef CONFIG_NO_HZ_COMMON
-DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+#ifdef CONFIG_TICK_ONESHOT
+extern void tick_irq_enter(void);
+#  ifndef arch_needs_cpu
+#   define arch_needs_cpu() (0)
+#  endif
+# else
+static inline void tick_irq_enter(void) { }
+#endif
 
-static inline int tick_nohz_tick_stopped(void)
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu);
+#else
+static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { }
+#endif
+
+enum tick_broadcast_mode {
+	TICK_BROADCAST_OFF,
+	TICK_BROADCAST_ON,
+	TICK_BROADCAST_FORCE,
+};
+
+enum tick_broadcast_state {
+	TICK_BROADCAST_EXIT,
+	TICK_BROADCAST_ENTER,
+};
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern void tick_broadcast_control(enum tick_broadcast_mode mode);
+#else
+static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
+#endif /* BROADCAST */
+
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
+#else
+static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; }
+#endif
+
+static inline void tick_broadcast_enable(void)
 {
-	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+	tick_broadcast_control(TICK_BROADCAST_ON);
+}
+static inline void tick_broadcast_disable(void)
+{
+	tick_broadcast_control(TICK_BROADCAST_OFF);
+}
+static inline void tick_broadcast_force(void)
+{
+	tick_broadcast_control(TICK_BROADCAST_FORCE);
+}
+static inline int tick_broadcast_enter(void)
+{
+	return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER);
+}
+static inline void tick_broadcast_exit(void)
+{
+	tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+extern int tick_nohz_tick_stopped(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-
-# else /* !CONFIG_NO_HZ_COMMON */
-static inline int tick_nohz_tick_stopped(void)
-{
-	return 0;
-}
-
+#else /* !CONFIG_NO_HZ_COMMON */
+static inline int tick_nohz_tick_stopped(void) { return 0; }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
@@ -163,7 +111,7 @@
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
-# endif /* !CONFIG_NO_HZ_COMMON */
+#endif /* !CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
 extern bool tick_nohz_full_running;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 05af9a3..fb86963 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -16,16 +16,16 @@
  * @read:	Read function of @clock
  * @mask:	Bitmask for two's complement subtraction of non 64bit clocks
  * @cycle_last: @clock cycle value at last update
- * @mult:	NTP adjusted multiplier for scaled math conversion
+ * @mult:	(NTP adjusted) multiplier for scaled math conversion
  * @shift:	Shift value for scaled math conversion
  * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono:  ktime_t (nanoseconds) base time for readout
+ * @base:	ktime_t (nanoseconds) base time for readout
  *
  * This struct has size 56 byte on 64 bit. Together with a seqcount it
  * occupies a single 64byte cache line.
  *
  * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
  */
 struct tk_read_base {
 	struct clocksource	*clock;
@@ -35,12 +35,13 @@
 	u32			mult;
 	u32			shift;
 	u64			xtime_nsec;
-	ktime_t			base_mono;
+	ktime_t			base;
 };
 
 /**
  * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr:		The readout base structure
+ * @tkr_mono:		The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw:		The readout base structure for CLOCK_MONOTONIC_RAW
  * @xtime_sec:		Current CLOCK_REALTIME time in seconds
  * @ktime_sec:		Current CLOCK_MONOTONIC time in seconds
  * @wall_to_monotonic:	CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@
  * @offs_boot:		Offset clock monotonic -> clock boottime
  * @offs_tai:		Offset clock monotonic -> clock tai
  * @tai_offset:		The current UTC to TAI offset in seconds
- * @base_raw:		Monotonic raw base time in ktime_t format
  * @raw_time:		Monotonic raw base time in timespec64 format
  * @cycle_interval:	Number of clock cycles in one NTP interval
  * @xtime_interval:	Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@
  * used instead.
  */
 struct timekeeper {
-	struct tk_read_base	tkr;
+	struct tk_read_base	tkr_mono;
+	struct tk_read_base	tkr_raw;
 	u64			xtime_sec;
 	unsigned long		ktime_sec;
 	struct timespec64	wall_to_monotonic;
@@ -84,7 +85,6 @@
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
 	s32			tai_offset;
-	ktime_t			base_raw;
 	struct timespec64	raw_time;
 
 	/* The following members are for timekeeping internal use */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3eaae47..99176af 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -214,12 +214,18 @@
 	return ktime_to_ns(ktime_get_boottime());
 }
 
+static inline u64 ktime_get_tai_ns(void)
+{
+	return ktime_to_ns(ktime_get_clocktai());
+}
+
 static inline u64 ktime_get_raw_ns(void)
 {
 	return ktime_to_ns(ktime_get_raw());
 }
 
 extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
 
 /*
  * Timespec interfaces utilizing the ktime based ones
@@ -242,6 +248,9 @@
 /*
  * RTC specific
  */
+extern bool timekeeping_rtc_skipsuspend(void);
+extern bool timekeeping_rtc_skipresume(void);
+
 extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
 
 /*
@@ -253,17 +262,14 @@
 /*
  * Persistent clock related interfaces
  */
-extern bool persistent_clock_exist;
 extern int persistent_clock_is_local;
 
-static inline bool has_persistent_clock(void)
-{
-	return persistent_clock_exist;
-}
-
 extern void read_persistent_clock(struct timespec *ts);
+extern void read_persistent_clock64(struct timespec64 *ts);
 extern void read_boot_clock(struct timespec *ts);
+extern void read_boot_clock64(struct timespec64 *ts);
 extern int update_persistent_clock(struct timespec now);
+extern int update_persistent_clock64(struct timespec64 now);
 
 
 #endif
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 2d67b89..049b2f4 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -78,19 +78,6 @@
 extern void vfio_unregister_iommu_driver(
 				const struct vfio_iommu_driver_ops *ops);
 
-/**
- * offsetofend(TYPE, MEMBER)
- *
- * @TYPE: The type of the structure
- * @MEMBER: The member within the structure to get the end offset of
- *
- * Simple helper macro for dealing with variable sized structures passed
- * from user space.  This allows us to easily determine if the provided
- * structure is sized to include various fields.
- */
-#define offsetofend(TYPE, MEMBER) \
-	(offsetof(TYPE, MEMBER)	+ sizeof(((TYPE *)0)->MEMBER))
-
 /*
  * External user API
  */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f597846..deee212 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -454,6 +454,7 @@
 extern unsigned int work_busy(struct work_struct *work);
 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
 extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+extern void show_workqueue_state(void);
 
 /**
  * queue_work - queue work on a workqueue
diff --git a/include/media/atmel-isi.h b/include/media/atmel-isi.h
index c2e5703..6008b09 100644
--- a/include/media/atmel-isi.h
+++ b/include/media/atmel-isi.h
@@ -59,6 +59,10 @@
 #define		ISI_CFG1_FRATE_DIV_MASK		(7 << 8)
 #define ISI_CFG1_DISCR				(1 << 11)
 #define ISI_CFG1_FULL_MODE			(1 << 12)
+/* Definition for THMASK(ISI_V2) */
+#define		ISI_CFG1_THMASK_BEATS_4		(0 << 13)
+#define		ISI_CFG1_THMASK_BEATS_8		(1 << 13)
+#define		ISI_CFG1_THMASK_BEATS_16	(2 << 13)
 
 /* Bitfields in CFG2 */
 #define ISI_CFG2_GRAYSCALE			(1 << 13)
diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h
index 1e1421b..5a4bb5b 100644
--- a/include/scsi/scsi_eh.h
+++ b/include/scsi/scsi_eh.h
@@ -59,6 +59,7 @@
 				   u64 * info_out);
 
 extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq);
+extern void scsi_set_sense_information(u8 *buf, u64 info);
 
 extern int scsi_ioctl_reset(struct scsi_device *, int __user *);
 
diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
new file mode 100644
index 0000000..8b0fbd9
--- /dev/null
+++ b/include/trace/events/libata.h
@@ -0,0 +1,325 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM libata
+
+#if !defined(_TRACE_LIBATA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LIBATA_H
+
+#include <linux/ata.h>
+#include <linux/libata.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#define ata_opcode_name(opcode)	{ opcode, #opcode }
+#define show_opcode_name(val)					\
+	__print_symbolic(val,					\
+		 ata_opcode_name(ATA_CMD_DEV_RESET),		\
+		 ata_opcode_name(ATA_CMD_CHK_POWER),		\
+		 ata_opcode_name(ATA_CMD_STANDBY),		\
+		 ata_opcode_name(ATA_CMD_IDLE),			\
+		 ata_opcode_name(ATA_CMD_EDD),			\
+		 ata_opcode_name(ATA_CMD_DOWNLOAD_MICRO),	\
+		 ata_opcode_name(ATA_CMD_DOWNLOAD_MICRO_DMA),	\
+		 ata_opcode_name(ATA_CMD_NOP),			\
+		 ata_opcode_name(ATA_CMD_FLUSH),		\
+		 ata_opcode_name(ATA_CMD_FLUSH_EXT),		\
+		 ata_opcode_name(ATA_CMD_ID_ATA),		\
+		 ata_opcode_name(ATA_CMD_ID_ATAPI),		\
+		 ata_opcode_name(ATA_CMD_SERVICE),		\
+		 ata_opcode_name(ATA_CMD_READ),			\
+		 ata_opcode_name(ATA_CMD_READ_EXT),		\
+		 ata_opcode_name(ATA_CMD_READ_QUEUED),		\
+		 ata_opcode_name(ATA_CMD_READ_STREAM_EXT),	\
+		 ata_opcode_name(ATA_CMD_READ_STREAM_DMA_EXT),	\
+		 ata_opcode_name(ATA_CMD_WRITE),		\
+		 ata_opcode_name(ATA_CMD_WRITE_EXT),		\
+		 ata_opcode_name(ATA_CMD_WRITE_QUEUED),		\
+		 ata_opcode_name(ATA_CMD_WRITE_STREAM_EXT),	\
+		 ata_opcode_name(ATA_CMD_WRITE_STREAM_DMA_EXT), \
+		 ata_opcode_name(ATA_CMD_WRITE_FUA_EXT),	\
+		 ata_opcode_name(ATA_CMD_WRITE_QUEUED_FUA_EXT), \
+		 ata_opcode_name(ATA_CMD_FPDMA_READ),		\
+		 ata_opcode_name(ATA_CMD_FPDMA_WRITE),		\
+		 ata_opcode_name(ATA_CMD_FPDMA_SEND),		\
+		 ata_opcode_name(ATA_CMD_FPDMA_RECV),		\
+		 ata_opcode_name(ATA_CMD_PIO_READ),		\
+		 ata_opcode_name(ATA_CMD_PIO_READ_EXT),		\
+		 ata_opcode_name(ATA_CMD_PIO_WRITE),		\
+		 ata_opcode_name(ATA_CMD_PIO_WRITE_EXT),	\
+		 ata_opcode_name(ATA_CMD_READ_MULTI),		\
+		 ata_opcode_name(ATA_CMD_READ_MULTI_EXT),	\
+		 ata_opcode_name(ATA_CMD_WRITE_MULTI),		\
+		 ata_opcode_name(ATA_CMD_WRITE_MULTI_EXT),	\
+		 ata_opcode_name(ATA_CMD_WRITE_MULTI_FUA_EXT),	\
+		 ata_opcode_name(ATA_CMD_SET_FEATURES),		\
+		 ata_opcode_name(ATA_CMD_SET_MULTI),		\
+		 ata_opcode_name(ATA_CMD_PACKET),		\
+		 ata_opcode_name(ATA_CMD_VERIFY),		\
+		 ata_opcode_name(ATA_CMD_VERIFY_EXT),		\
+		 ata_opcode_name(ATA_CMD_WRITE_UNCORR_EXT),	\
+		 ata_opcode_name(ATA_CMD_STANDBYNOW1),		\
+		 ata_opcode_name(ATA_CMD_IDLEIMMEDIATE),	\
+		 ata_opcode_name(ATA_CMD_SLEEP),		\
+		 ata_opcode_name(ATA_CMD_INIT_DEV_PARAMS),	\
+		 ata_opcode_name(ATA_CMD_READ_NATIVE_MAX),	\
+		 ata_opcode_name(ATA_CMD_READ_NATIVE_MAX_EXT),	\
+		 ata_opcode_name(ATA_CMD_SET_MAX),		\
+		 ata_opcode_name(ATA_CMD_SET_MAX_EXT),		\
+		 ata_opcode_name(ATA_CMD_READ_LOG_EXT),		\
+		 ata_opcode_name(ATA_CMD_WRITE_LOG_EXT),	\
+		 ata_opcode_name(ATA_CMD_READ_LOG_DMA_EXT),	\
+		 ata_opcode_name(ATA_CMD_WRITE_LOG_DMA_EXT),	\
+		 ata_opcode_name(ATA_CMD_TRUSTED_NONDATA),	\
+		 ata_opcode_name(ATA_CMD_TRUSTED_RCV),		\
+		 ata_opcode_name(ATA_CMD_TRUSTED_RCV_DMA),	\
+		 ata_opcode_name(ATA_CMD_TRUSTED_SND),		\
+		 ata_opcode_name(ATA_CMD_TRUSTED_SND_DMA),	\
+		 ata_opcode_name(ATA_CMD_PMP_READ),		\
+		 ata_opcode_name(ATA_CMD_PMP_READ_DMA),		\
+		 ata_opcode_name(ATA_CMD_PMP_WRITE),		\
+		 ata_opcode_name(ATA_CMD_PMP_WRITE_DMA),	\
+		 ata_opcode_name(ATA_CMD_CONF_OVERLAY),		\
+		 ata_opcode_name(ATA_CMD_SEC_SET_PASS),		\
+		 ata_opcode_name(ATA_CMD_SEC_UNLOCK),		\
+		 ata_opcode_name(ATA_CMD_SEC_ERASE_PREP),	\
+		 ata_opcode_name(ATA_CMD_SEC_ERASE_UNIT),	\
+		 ata_opcode_name(ATA_CMD_SEC_FREEZE_LOCK),	\
+		 ata_opcode_name(ATA_CMD_SEC_DISABLE_PASS),	\
+		 ata_opcode_name(ATA_CMD_CONFIG_STREAM),	\
+		 ata_opcode_name(ATA_CMD_SMART),		\
+		 ata_opcode_name(ATA_CMD_MEDIA_LOCK),		\
+		 ata_opcode_name(ATA_CMD_MEDIA_UNLOCK),		\
+		 ata_opcode_name(ATA_CMD_DSM),			\
+		 ata_opcode_name(ATA_CMD_CHK_MED_CRD_TYP),	\
+		 ata_opcode_name(ATA_CMD_CFA_REQ_EXT_ERR),	\
+		 ata_opcode_name(ATA_CMD_CFA_WRITE_NE),		\
+		 ata_opcode_name(ATA_CMD_CFA_TRANS_SECT),	\
+		 ata_opcode_name(ATA_CMD_CFA_ERASE),		\
+		 ata_opcode_name(ATA_CMD_CFA_WRITE_MULT_NE),	\
+		 ata_opcode_name(ATA_CMD_REQ_SENSE_DATA),	\
+		 ata_opcode_name(ATA_CMD_SANITIZE_DEVICE),	\
+		 ata_opcode_name(ATA_CMD_RESTORE),		\
+		 ata_opcode_name(ATA_CMD_READ_LONG),		\
+		 ata_opcode_name(ATA_CMD_READ_LONG_ONCE),	\
+		 ata_opcode_name(ATA_CMD_WRITE_LONG),		\
+		 ata_opcode_name(ATA_CMD_WRITE_LONG_ONCE))
+
+#define ata_error_name(result)	{ result, #result }
+#define show_error_name(val)				\
+	__print_symbolic(val,				\
+		ata_error_name(ATA_ICRC),		\
+		ata_error_name(ATA_UNC),		\
+		ata_error_name(ATA_MC),			\
+		ata_error_name(ATA_IDNF),		\
+		ata_error_name(ATA_MCR),		\
+		ata_error_name(ATA_ABORTED),		\
+		ata_error_name(ATA_TRK0NF),		\
+		ata_error_name(ATA_AMNF))
+
+#define ata_protocol_name(proto)	{ proto, #proto }
+#define show_protocol_name(val)				\
+	__print_symbolic(val,				\
+		ata_protocol_name(ATA_PROT_UNKNOWN),	\
+		ata_protocol_name(ATA_PROT_NODATA),	\
+		ata_protocol_name(ATA_PROT_PIO),	\
+		ata_protocol_name(ATA_PROT_DMA),	\
+		ata_protocol_name(ATA_PROT_NCQ),	\
+		ata_protocol_name(ATAPI_PROT_NODATA),	\
+		ata_protocol_name(ATAPI_PROT_PIO),	\
+		ata_protocol_name(ATAPI_PROT_DMA))
+
+const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
+#define __parse_status(s) libata_trace_parse_status(p, s)
+
+const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int);
+#define __parse_eh_action(a) libata_trace_parse_eh_action(p, a)
+
+const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int);
+#define __parse_eh_err_mask(m) libata_trace_parse_eh_err_mask(p, m)
+
+const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int);
+#define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f)
+
+TRACE_EVENT(ata_qc_issue,
+
+	TP_PROTO(struct ata_queued_cmd *qc),
+
+	TP_ARGS(qc),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	tag	)
+		__field( unsigned char,	cmd	)
+		__field( unsigned char,	dev	)
+		__field( unsigned char,	lbal	)
+		__field( unsigned char,	lbam	)
+		__field( unsigned char,	lbah	)
+		__field( unsigned char,	nsect	)
+		__field( unsigned char,	feature	)
+		__field( unsigned char,	hob_lbal )
+		__field( unsigned char,	hob_lbam )
+		__field( unsigned char,	hob_lbah )
+		__field( unsigned char,	hob_nsect )
+		__field( unsigned char,	hob_feature )
+		__field( unsigned char,	ctl )
+		__field( unsigned char,	proto )
+		__field( unsigned long,	flags )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= qc->ap->print_id;
+		__entry->ata_dev	= qc->dev->link->pmp + qc->dev->devno;
+		__entry->tag		= qc->tag;
+		__entry->proto		= qc->tf.protocol;
+		__entry->cmd		= qc->tf.command;
+		__entry->dev		= qc->tf.device;
+		__entry->lbal		= qc->tf.lbal;
+		__entry->lbam		= qc->tf.lbam;
+		__entry->lbah		= qc->tf.lbah;
+		__entry->hob_lbal	= qc->tf.hob_lbal;
+		__entry->hob_lbam	= qc->tf.hob_lbam;
+		__entry->hob_lbah	= qc->tf.hob_lbah;
+		__entry->feature	= qc->tf.feature;
+		__entry->hob_feature	= qc->tf.hob_feature;
+		__entry->nsect		= qc->tf.nsect;
+		__entry->hob_nsect	= qc->tf.hob_nsect;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s cmd=%s " \
+		  " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)",
+		  __entry->ata_port, __entry->ata_dev, __entry->tag,
+		  show_protocol_name(__entry->proto),
+		  show_opcode_name(__entry->cmd),
+		  __entry->cmd, __entry->feature, __entry->nsect,
+		  __entry->lbal, __entry->lbam, __entry->lbah,
+		  __entry->hob_feature, __entry->hob_nsect,
+		  __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah,
+		  __entry->dev)
+);
+
+DECLARE_EVENT_CLASS(ata_qc_complete_template,
+
+	TP_PROTO(struct ata_queued_cmd *qc),
+
+	TP_ARGS(qc),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	tag	)
+		__field( unsigned char,	status	)
+		__field( unsigned char,	dev	)
+		__field( unsigned char,	lbal	)
+		__field( unsigned char,	lbam	)
+		__field( unsigned char,	lbah	)
+		__field( unsigned char,	nsect	)
+		__field( unsigned char,	error	)
+		__field( unsigned char,	hob_lbal )
+		__field( unsigned char,	hob_lbam )
+		__field( unsigned char,	hob_lbah )
+		__field( unsigned char,	hob_nsect )
+		__field( unsigned char,	hob_feature )
+		__field( unsigned char,	ctl )
+		__field( unsigned long,	flags )
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= qc->ap->print_id;
+		__entry->ata_dev	= qc->dev->link->pmp + qc->dev->devno;
+		__entry->tag		= qc->tag;
+		__entry->status		= qc->result_tf.command;
+		__entry->dev		= qc->result_tf.device;
+		__entry->lbal		= qc->result_tf.lbal;
+		__entry->lbam		= qc->result_tf.lbam;
+		__entry->lbah		= qc->result_tf.lbah;
+		__entry->hob_lbal	= qc->result_tf.hob_lbal;
+		__entry->hob_lbam	= qc->result_tf.hob_lbam;
+		__entry->hob_lbah	= qc->result_tf.hob_lbah;
+		__entry->error		= qc->result_tf.feature;
+		__entry->hob_feature	= qc->result_tf.hob_feature;
+		__entry->nsect		= qc->result_tf.nsect;
+		__entry->hob_nsect	= qc->result_tf.hob_nsect;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \
+		  " res=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)",
+		  __entry->ata_port, __entry->ata_dev, __entry->tag,
+		  __parse_qc_flags(__entry->flags),
+		  __parse_status(__entry->status),
+		  __entry->status, __entry->error, __entry->nsect,
+		  __entry->lbal, __entry->lbam, __entry->lbah,
+		  __entry->hob_feature, __entry->hob_nsect,
+		  __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah,
+		  __entry->dev)
+);
+
+DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_internal,
+	     TP_PROTO(struct ata_queued_cmd *qc),
+	     TP_ARGS(qc));
+
+DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_failed,
+	     TP_PROTO(struct ata_queued_cmd *qc),
+	     TP_ARGS(qc));
+
+DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done,
+	     TP_PROTO(struct ata_queued_cmd *qc),
+	     TP_ARGS(qc));
+
+TRACE_EVENT(ata_eh_link_autopsy,
+
+	TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask),
+
+	TP_ARGS(dev, eh_action, eh_err_mask),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	eh_action )
+		__field( unsigned int,	eh_err_mask)
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= dev->link->ap->print_id;
+		__entry->ata_dev	= dev->link->pmp + dev->devno;
+		__entry->eh_action	= eh_action;
+		__entry->eh_err_mask	= eh_err_mask;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u eh_action=%s err_mask=%s",
+		  __entry->ata_port, __entry->ata_dev,
+		  __parse_eh_action(__entry->eh_action),
+		  __parse_eh_err_mask(__entry->eh_err_mask))
+);
+
+TRACE_EVENT(ata_eh_link_autopsy_qc,
+
+	TP_PROTO(struct ata_queued_cmd *qc),
+
+	TP_ARGS(qc),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	ata_port )
+		__field( unsigned int,	ata_dev	)
+		__field( unsigned int,	tag	)
+		__field( unsigned int,	qc_flags )
+		__field( unsigned int,	eh_err_mask)
+	),
+
+	TP_fast_assign(
+		__entry->ata_port	= qc->ap->print_id;
+		__entry->ata_dev	= qc->dev->link->pmp + qc->dev->devno;
+		__entry->tag		= qc->tag;
+		__entry->qc_flags	= qc->flags;
+		__entry->eh_err_mask	= qc->err_mask;
+	),
+
+	TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s err_mask=%s",
+		  __entry->ata_port, __entry->ata_dev, __entry->tag,
+		  __parse_qc_flags(__entry->qc_flags),
+		  __parse_eh_err_mask(__entry->eh_err_mask))
+);
+
+#endif /*  _TRACE_LIBATA_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild
index 30ab3cd..a16a005 100644
--- a/include/uapi/linux/hsi/Kbuild
+++ b/include/uapi/linux/hsi/Kbuild
@@ -1,2 +1,2 @@
 # UAPI Header export list
-header-y += hsi_char.h
+header-y += hsi_char.h cs-protocol.h
diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h
new file mode 100644
index 0000000..4957bba
--- /dev/null
+++ b/include/uapi/linux/hsi/cs-protocol.h
@@ -0,0 +1,113 @@
+/*
+ * cmt-speech interface definitions
+ *
+ * Copyright (C) 2008,2009,2010 Nokia Corporation. All rights reserved.
+ *
+ * Contact: Kai Vehmanen <kai.vehmanen@nokia.com>
+ * Original author: Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _CS_PROTOCOL_H
+#define _CS_PROTOCOL_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* chardev parameters */
+#define CS_DEV_FILE_NAME		"/dev/cmt_speech"
+
+/* user-space API versioning */
+#define CS_IF_VERSION			2
+
+/* APE kernel <-> user space messages */
+#define CS_CMD_SHIFT			28
+#define CS_DOMAIN_SHIFT			24
+
+#define CS_CMD_MASK			0xff000000
+#define CS_PARAM_MASK			0xffffff
+
+#define CS_CMD(id, dom) \
+	(((id) << CS_CMD_SHIFT) | ((dom) << CS_DOMAIN_SHIFT))
+
+#define CS_ERROR			CS_CMD(1, 0)
+#define CS_RX_DATA_RECEIVED		CS_CMD(2, 0)
+#define CS_TX_DATA_READY		CS_CMD(3, 0)
+#define CS_TX_DATA_SENT			CS_CMD(4, 0)
+
+/* params to CS_ERROR indication */
+#define CS_ERR_PEER_RESET		0
+
+/* ioctl interface */
+
+/* parameters to CS_CONFIG_BUFS ioctl */
+#define CS_FEAT_TSTAMP_RX_CTRL		(1 << 0)
+#define CS_FEAT_ROLLING_RX_COUNTER	(2 << 0)
+
+/* parameters to CS_GET_STATE ioctl */
+#define CS_STATE_CLOSED			0
+#define CS_STATE_OPENED			1 /* resource allocated */
+#define CS_STATE_CONFIGURED		2 /* data path active */
+
+/* maximum number of TX/RX buffers */
+#define CS_MAX_BUFFERS_SHIFT		4
+#define CS_MAX_BUFFERS			(1 << CS_MAX_BUFFERS_SHIFT)
+
+/* Parameters for setting up the data buffers */
+struct cs_buffer_config {
+	__u32 rx_bufs;	/* number of RX buffer slots */
+	__u32 tx_bufs;	/* number of TX buffer slots */
+	__u32 buf_size;	/* bytes */
+	__u32 flags;	/* see CS_FEAT_* */
+	__u32 reserved[4];
+};
+
+/*
+ * Struct describing the layout and contents of the driver mmap area.
+ * This information is meant as read-only information for the application.
+ */
+struct cs_mmap_config_block {
+	__u32 reserved1;
+	__u32 buf_size;		/* 0=disabled, otherwise the transfer size */
+	__u32 rx_bufs;		/* # of RX buffers */
+	__u32 tx_bufs;		/* # of TX buffers */
+	__u32 reserved2;
+	/* array of offsets within the mmap area for each RX and TX buffer */
+	__u32 rx_offsets[CS_MAX_BUFFERS];
+	__u32 tx_offsets[CS_MAX_BUFFERS];
+	__u32 rx_ptr;
+	__u32 rx_ptr_boundary;
+	__u32 reserved3[2];
+	/*
+	 * if enabled with CS_FEAT_TSTAMP_RX_CTRL, monotonic
+	 * timestamp taken when the last control command was received
+	 */
+	struct timespec tstamp_rx_ctrl;
+};
+
+#define CS_IO_MAGIC		'C'
+
+#define CS_IOW(num, dtype)	_IOW(CS_IO_MAGIC, num, dtype)
+#define CS_IOR(num, dtype)	_IOR(CS_IO_MAGIC, num, dtype)
+#define CS_IOWR(num, dtype)	_IOWR(CS_IO_MAGIC, num, dtype)
+#define CS_IO(num)		_IO(CS_IO_MAGIC, num)
+
+#define CS_GET_STATE		CS_IOR(21, unsigned int)
+#define CS_SET_WAKELINE		CS_IOW(23, unsigned int)
+#define CS_GET_IF_VERSION	CS_IOR(30, unsigned int)
+#define CS_CONFIG_BUFS		CS_IOW(31, struct cs_buffer_config)
+
+#endif /* _CS_PROTOCOL_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8055706..f574d7b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -147,6 +147,16 @@
 
 #define KVM_PIT_SPEAKER_DUMMY     1
 
+struct kvm_s390_skeys {
+	__u64 start_gfn;
+	__u64 count;
+	__u64 skeydata_addr;
+	__u32 flags;
+	__u32 reserved[9];
+};
+#define KVM_S390_GET_SKEYS_NONE   1
+#define KVM_S390_SKEYS_MAX        1048576
+
 #define KVM_EXIT_UNKNOWN          0
 #define KVM_EXIT_EXCEPTION        1
 #define KVM_EXIT_IO               2
@@ -172,6 +182,7 @@
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
 #define KVM_EXIT_SYSTEM_EVENT     24
+#define KVM_EXIT_S390_STSI        25
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -309,6 +320,15 @@
 			__u32 type;
 			__u64 flags;
 		} system_event;
+		/* KVM_EXIT_S390_STSI */
+		struct {
+			__u64 addr;
+			__u8 ar;
+			__u8 reserved;
+			__u8 fc;
+			__u8 sel1;
+			__u16 sel2;
+		} s390_stsi;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -324,7 +344,7 @@
 	__u64 kvm_dirty_regs;
 	union {
 		struct kvm_sync_regs regs;
-		char padding[1024];
+		char padding[2048];
 	} s;
 };
 
@@ -365,6 +385,24 @@
 	__u8  pad[5];
 };
 
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+	/* in */
+	__u64 gaddr;		/* the guest address */
+	__u64 flags;		/* flags */
+	__u32 size;		/* amount of bytes */
+	__u32 op;		/* type of operation */
+	__u64 buf;		/* buffer in userspace */
+	__u8 ar;		/* the access register number */
+	__u8 reserved[31];	/* should be set to 0 */
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ	0
+#define KVM_S390_MEMOP_LOGICAL_WRITE	1
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
+
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
 	/* in */
@@ -520,6 +558,13 @@
 	} u;
 };
 
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;
+	__u32 len;
+	__u32 reserved[4];
+};
+
 /* for KVM_SET_GUEST_DEBUG */
 
 #define KVM_GUESTDBG_ENABLE		0x00000001
@@ -760,6 +805,14 @@
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
 #define KVM_CAP_S390_USER_SIGP 106
+#define KVM_CAP_S390_VECTOR_REGISTERS 107
+#define KVM_CAP_S390_MEM_OP 108
+#define KVM_CAP_S390_USER_STSI 109
+#define KVM_CAP_S390_SKEYS 110
+#define KVM_CAP_MIPS_FPU 111
+#define KVM_CAP_MIPS_MSA 112
+#define KVM_CAP_S390_INJECT_IRQ 113
+#define KVM_CAP_S390_IRQ_STATE 114
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1135,6 +1188,16 @@
 #define KVM_ARM_VCPU_INIT	  _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
 #define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST	  _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
+/* Available with KVM_CAP_S390_MEM_OP */
+#define KVM_S390_MEM_OP		  _IOW(KVMIO,  0xb1, struct kvm_s390_mem_op)
+/* Available with KVM_CAP_S390_SKEYS */
+#define KVM_S390_GET_SKEYS      _IOW(KVMIO, 0xb2, struct kvm_s390_skeys)
+#define KVM_S390_SET_SKEYS      _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
+/* Available with KVM_CAP_S390_INJECT_IRQ */
+#define KVM_S390_IRQ              _IOW(KVMIO,  0xb4, struct kvm_s390_irq)
+/* Available with KVM_CAP_S390_IRQ_STATE */
+#define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
+#define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
diff --git a/init/main.c b/init/main.c
index 6f0f1c5f..4a6974e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -654,8 +654,8 @@
 	page_writeback_init();
 	proc_root_init();
 	nsfs_init();
-	cgroup_init();
 	cpuset_init();
+	cgroup_init();
 	taskstats_init_early();
 	delayacct_init();
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 29a7b2c..a220fdb 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3806,10 +3806,7 @@
 
 static void pidlist_free(void *p)
 {
-	if (is_vmalloc_addr(p))
-		vfree(p);
-	else
-		kfree(p);
+	kvfree(p);
 }
 
 /*
@@ -5040,6 +5037,9 @@
 			WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
 			WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
 		}
+
+		if (ss->bind)
+			ss->bind(init_css_set.subsys[ssid]);
 	}
 
 	cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1972b16..82eea9c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -20,6 +20,7 @@
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
+#include <linux/tick.h>
 #include <trace/events/power.h>
 
 #include "smpboot.h"
@@ -338,6 +339,8 @@
 		return err;
 
 	cpu_notify(CPU_DYING | param->mod, param->hcpu);
+	/* Give up timekeeping duties */
+	tick_handover_do_timer();
 	/* Park the stopper thread */
 	kthread_park(current);
 	return 0;
@@ -411,10 +414,12 @@
 	while (!idle_cpu(cpu))
 		cpu_relax();
 
+	hotplug_cpu__broadcast_tick_pull(cpu);
 	/* This actually kills the CPU. */
 	__cpu_die(cpu);
 
 	/* CPU is completely dead: tell everyone.  Too late to complain. */
+	tick_cleanup_dead_cpu(cpu);
 	cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 
 	check_for_tasks(cpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fc7f474..c68f072 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -622,6 +622,7 @@
 	int csn;		/* how many cpuset ptrs in csa so far */
 	int i, j, k;		/* indices for partition finding loops */
 	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
+	cpumask_var_t non_isolated_cpus;  /* load balanced CPUs */
 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
 	int ndoms = 0;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] struct cpumask slot */
@@ -631,6 +632,10 @@
 	dattr = NULL;
 	csa = NULL;
 
+	if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
+		goto done;
+	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+
 	/* Special case for the 99% of systems with one, full, sched domain */
 	if (is_sched_load_balance(&top_cpuset)) {
 		ndoms = 1;
@@ -643,7 +648,8 @@
 			*dattr = SD_ATTR_INIT;
 			update_domain_attr_tree(dattr, &top_cpuset);
 		}
-		cpumask_copy(doms[0], top_cpuset.effective_cpus);
+		cpumask_and(doms[0], top_cpuset.effective_cpus,
+				     non_isolated_cpus);
 
 		goto done;
 	}
@@ -666,7 +672,8 @@
 		 * the corresponding sched domain.
 		 */
 		if (!cpumask_empty(cp->cpus_allowed) &&
-		    !is_sched_load_balance(cp))
+		    !(is_sched_load_balance(cp) &&
+		      cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
 			continue;
 
 		if (is_sched_load_balance(cp))
@@ -748,6 +755,7 @@
 
 			if (apn == b->pn) {
 				cpumask_or(dp, dp, b->effective_cpus);
+				cpumask_and(dp, dp, non_isolated_cpus);
 				if (dattr)
 					update_domain_attr_tree(dattr + nslot, b);
 
@@ -760,6 +768,7 @@
 	BUG_ON(nslot != ndoms);
 
 done:
+	free_cpumask_var(non_isolated_cpus);
 	kfree(csa);
 
 	/*
diff --git a/kernel/futex.c b/kernel/futex.c
index 2a5e383..2579e40 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -900,7 +900,7 @@
 	if (!p)
 		return -ESRCH;
 
-	if (!p->mm) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		put_task_struct(p);
 		return -EPERM;
 	}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6f1c7a5..eb9a4ea 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -948,6 +948,22 @@
 
 	return -ENOSYS;
 }
+
+/**
+ * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ * @on:		Whether to set or reset the wake-up capability of this irq
+ *
+ * Conditional, as the underlying parent chip might not implement it.
+ */
+int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
+{
+	data = data->parent_data;
+	if (data->chip->irq_set_wake)
+		return data->chip->irq_set_wake(data, on);
+
+	return -ENOSYS;
+}
 #endif
 
 /**
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 886d09e..e68932b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -68,14 +68,20 @@
  *	Do not use this for shutdown scenarios where you must be sure
  *	that all parts (hardirq and threaded handler) have completed.
  *
+ *	Returns: false if a threaded handler is active.
+ *
  *	This function may be called - with care - from IRQ context.
  */
-void synchronize_hardirq(unsigned int irq)
+bool synchronize_hardirq(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (desc)
+	if (desc) {
 		__synchronize_hardirq(desc);
+		return !atomic_read(&desc->threads_active);
+	}
+
+	return true;
 }
 EXPORT_SYMBOL(synchronize_hardirq);
 
@@ -440,6 +446,32 @@
 }
 EXPORT_SYMBOL(disable_irq);
 
+/**
+ *	disable_hardirq - disables an irq and waits for hardirq completion
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Enables and Disables are
+ *	nested.
+ *	This function waits for any pending hard IRQ handlers for this
+ *	interrupt to complete before returning. If you use this function while
+ *	holding a resource the hard IRQ handler may need you will deadlock.
+ *
+ *	When used to optimistically disable an interrupt from atomic context
+ *	the return value must be checked.
+ *
+ *	Returns: false if a threaded handler is active.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+bool disable_hardirq(unsigned int irq)
+{
+	if (!__disable_irq_nosync(irq))
+		return synchronize_hardirq(irq);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(disable_hardirq);
+
 void __enable_irq(struct irq_desc *desc, unsigned int irq)
 {
 	switch (desc->depth) {
@@ -1766,3 +1798,94 @@
 
 	return retval;
 }
+
+/**
+ *	irq_get_irqchip_state - returns the irqchip state of a interrupt.
+ *	@irq: Interrupt line that is forwarded to a VM
+ *	@which: One of IRQCHIP_STATE_* the caller wants to know about
+ *	@state: a pointer to a boolean where the state is to be storeed
+ *
+ *	This call snapshots the internal irqchip state of an
+ *	interrupt, returning into @state the bit corresponding to
+ *	stage @which
+ *
+ *	This function should be called with preemption disabled if the
+ *	interrupt controller has per-cpu registers.
+ */
+int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
+			  bool *state)
+{
+	struct irq_desc *desc;
+	struct irq_data *data;
+	struct irq_chip *chip;
+	unsigned long flags;
+	int err = -EINVAL;
+
+	desc = irq_get_desc_buslock(irq, &flags, 0);
+	if (!desc)
+		return err;
+
+	data = irq_desc_get_irq_data(desc);
+
+	do {
+		chip = irq_data_get_irq_chip(data);
+		if (chip->irq_get_irqchip_state)
+			break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+		data = data->parent_data;
+#else
+		data = NULL;
+#endif
+	} while (data);
+
+	if (data)
+		err = chip->irq_get_irqchip_state(data, which, state);
+
+	irq_put_desc_busunlock(desc, flags);
+	return err;
+}
+
+/**
+ *	irq_set_irqchip_state - set the state of a forwarded interrupt.
+ *	@irq: Interrupt line that is forwarded to a VM
+ *	@which: State to be restored (one of IRQCHIP_STATE_*)
+ *	@val: Value corresponding to @which
+ *
+ *	This call sets the internal irqchip state of an interrupt,
+ *	depending on the value of @which.
+ *
+ *	This function should be called with preemption disabled if the
+ *	interrupt controller has per-cpu registers.
+ */
+int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
+			  bool val)
+{
+	struct irq_desc *desc;
+	struct irq_data *data;
+	struct irq_chip *chip;
+	unsigned long flags;
+	int err = -EINVAL;
+
+	desc = irq_get_desc_buslock(irq, &flags, 0);
+	if (!desc)
+		return err;
+
+	data = irq_desc_get_irq_data(desc);
+
+	do {
+		chip = irq_data_get_irq_chip(data);
+		if (chip->irq_set_irqchip_state)
+			break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+		data = data->parent_data;
+#else
+		data = NULL;
+#endif
+	} while (data);
+
+	if (data)
+		err = chip->irq_set_irqchip_state(data, which, val);
+
+	irq_put_desc_busunlock(desc, flags);
+	return err;
+}
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 3e18163..474de5c 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -310,8 +310,15 @@
 	struct msi_desc *desc;
 
 	for_each_msi_entry(desc, dev) {
-		irq_domain_free_irqs(desc->irq, desc->nvec_used);
-		desc->irq = 0;
+		/*
+		 * We might have failed to allocate an MSI early
+		 * enough that there is no IRQ associated to this
+		 * entry. If that's the case, don't do anything.
+		 */
+		if (desc->irq) {
+			irq_domain_free_irqs(desc->irq, desc->nvec_used);
+			desc->irq = 0;
+		}
 	}
 }
 
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index d1fe2ba..75e114b 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -78,7 +78,7 @@
 		 */
 		return;
 	}
-	ACCESS_ONCE(prev->next) = node;
+	WRITE_ONCE(prev->next, node);
 
 	/* Wait until the lock holder passes the lock down. */
 	arch_mcs_spin_lock_contended(&node->locked);
@@ -91,7 +91,7 @@
 static inline
 void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
-	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+	struct mcs_spinlock *next = READ_ONCE(node->next);
 
 	if (likely(!next)) {
 		/*
@@ -100,7 +100,7 @@
 		if (likely(cmpxchg(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
+		while (!(next = READ_ONCE(node->next)))
 			cpu_relax_lowlatency();
 	}
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 94674e5..4cccea6 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
-#include "mcs_spinlock.h"
+#include <linux/osq_lock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -217,44 +217,35 @@
 }
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
-{
-	if (lock->owner != owner)
-		return false;
-
-	/*
-	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
-	 * lock->owner still matches owner, if that fails, owner might
-	 * point to free()d memory, if it still matches, the rcu_read_lock()
-	 * ensures the memory stays valid.
-	 */
-	barrier();
-
-	return owner->on_cpu;
-}
-
 /*
  * Look out! "owner" is an entirely speculative pointer
  * access and not reliable.
  */
 static noinline
-int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 {
+	bool ret = true;
+
 	rcu_read_lock();
-	while (owner_running(lock, owner)) {
-		if (need_resched())
+	while (lock->owner == owner) {
+		/*
+		 * Ensure we emit the owner->on_cpu, dereference _after_
+		 * checking lock->owner still matches owner. If that fails,
+		 * owner might point to freed memory. If it still matches,
+		 * the rcu_read_lock() ensures the memory stays valid.
+		 */
+		barrier();
+
+		if (!owner->on_cpu || need_resched()) {
+			ret = false;
 			break;
+		}
 
 		cpu_relax_lowlatency();
 	}
 	rcu_read_unlock();
 
-	/*
-	 * We break out the loop above on need_resched() and when the
-	 * owner changed, which is a sign for heavy contention. Return
-	 * success only when lock->owner is NULL.
-	 */
-	return lock->owner == NULL;
+	return ret;
 }
 
 /*
@@ -269,7 +260,7 @@
 		return 0;
 
 	rcu_read_lock();
-	owner = ACCESS_ONCE(lock->owner);
+	owner = READ_ONCE(lock->owner);
 	if (owner)
 		retval = owner->on_cpu;
 	rcu_read_unlock();
@@ -343,7 +334,7 @@
 			 * As such, when deadlock detection needs to be
 			 * performed the optimistic spinning cannot be done.
 			 */
-			if (ACCESS_ONCE(ww->ctx))
+			if (READ_ONCE(ww->ctx))
 				break;
 		}
 
@@ -351,7 +342,7 @@
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		owner = ACCESS_ONCE(lock->owner);
+		owner = READ_ONCE(lock->owner);
 		if (owner && !mutex_spin_on_owner(lock, owner))
 			break;
 
@@ -490,7 +481,7 @@
 __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
 {
 	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
-	struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
+	struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
 
 	if (!hold_ctx)
 		return 0;
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index c112d00..dc85ee2 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -98,7 +98,7 @@
 
 	prev = decode_cpu(old);
 	node->prev = prev;
-	ACCESS_ONCE(prev->next) = node;
+	WRITE_ONCE(prev->next, node);
 
 	/*
 	 * Normally @prev is untouchable after the above store; because at that
@@ -109,7 +109,7 @@
 	 * cmpxchg in an attempt to undo our queueing.
 	 */
 
-	while (!ACCESS_ONCE(node->locked)) {
+	while (!READ_ONCE(node->locked)) {
 		/*
 		 * If we need to reschedule bail... so we can block.
 		 */
@@ -148,7 +148,7 @@
 		 * Or we race against a concurrent unqueue()'s step-B, in which
 		 * case its step-C will write us a new @node->prev pointer.
 		 */
-		prev = ACCESS_ONCE(node->prev);
+		prev = READ_ONCE(node->prev);
 	}
 
 	/*
@@ -170,8 +170,8 @@
 	 * it will wait in Step-A.
 	 */
 
-	ACCESS_ONCE(next->prev) = prev;
-	ACCESS_ONCE(prev->next) = next;
+	WRITE_ONCE(next->prev, prev);
+	WRITE_ONCE(prev->next, next);
 
 	return false;
 }
@@ -193,11 +193,11 @@
 	node = this_cpu_ptr(&osq_node);
 	next = xchg(&node->next, NULL);
 	if (next) {
-		ACCESS_ONCE(next->locked) = 1;
+		WRITE_ONCE(next->locked, 1);
 		return;
 	}
 
 	next = osq_wait_next(lock, node, NULL);
 	if (next)
-		ACCESS_ONCE(next->locked) = 1;
+		WRITE_ONCE(next->locked, 1);
 }
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6357265..b732793 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -349,7 +349,7 @@
  *
  * @task:	the task owning the mutex (owner) for which a chain walk is
  *		probably needed
- * @deadlock_detect: do we have to carry out deadlock detection?
+ * @chwalk:	do we have to carry out deadlock detection?
  * @orig_lock:	the mutex (can be NULL if we are walking the chain to recheck
  *		things for a task that has just got its priority adjusted, and
  *		is waiting on a mutex)
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 2555ae1..3a50485 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -85,6 +85,13 @@
 
 		list_del(&waiter->list);
 		tsk = waiter->task;
+		/*
+		 * Make sure we do not wakeup the next reader before
+		 * setting the nil condition to grant the next reader;
+		 * otherwise we could miss the wakeup on the other
+		 * side and end up sleeping again. See the pairing
+		 * in rwsem_down_read_failed().
+		 */
 		smp_mb();
 		waiter->task = NULL;
 		wake_up_process(tsk);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2f7cc40..3417d01 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -14,8 +14,9 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
+#include <linux/osq_lock.h>
 
-#include "mcs_spinlock.h"
+#include "rwsem.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -186,6 +187,13 @@
 		waiter = list_entry(next, struct rwsem_waiter, list);
 		next = waiter->list.next;
 		tsk = waiter->task;
+		/*
+		 * Make sure we do not wakeup the next reader before
+		 * setting the nil condition to grant the next reader;
+		 * otherwise we could miss the wakeup on the other
+		 * side and end up sleeping again. See the pairing
+		 * in rwsem_down_read_failed().
+		 */
 		smp_mb();
 		waiter->task = NULL;
 		wake_up_process(tsk);
@@ -258,6 +266,7 @@
 		    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
 		if (!list_is_singular(&sem->wait_list))
 			rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+		rwsem_set_owner(sem);
 		return true;
 	}
 
@@ -270,15 +279,17 @@
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-	long old, count = ACCESS_ONCE(sem->count);
+	long old, count = READ_ONCE(sem->count);
 
 	while (true) {
 		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
 			return false;
 
 		old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
-		if (old == count)
+		if (old == count) {
+			rwsem_set_owner(sem);
 			return true;
+		}
 
 		count = old;
 	}
@@ -287,60 +298,67 @@
 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 {
 	struct task_struct *owner;
-	bool on_cpu = false;
+	bool ret = true;
 
 	if (need_resched())
 		return false;
 
 	rcu_read_lock();
-	owner = ACCESS_ONCE(sem->owner);
-	if (owner)
-		on_cpu = owner->on_cpu;
+	owner = READ_ONCE(sem->owner);
+	if (!owner) {
+		long count = READ_ONCE(sem->count);
+		/*
+		 * If sem->owner is not set, yet we have just recently entered the
+		 * slowpath with the lock being active, then there is a possibility
+		 * reader(s) may have the lock. To be safe, bail spinning in these
+		 * situations.
+		 */
+		if (count & RWSEM_ACTIVE_MASK)
+			ret = false;
+		goto done;
+	}
+
+	ret = owner->on_cpu;
+done:
 	rcu_read_unlock();
-
-	/*
-	 * If sem->owner is not set, yet we have just recently entered the
-	 * slowpath, then there is a possibility reader(s) may have the lock.
-	 * To be safe, avoid spinning in these situations.
-	 */
-	return on_cpu;
-}
-
-static inline bool owner_running(struct rw_semaphore *sem,
-				 struct task_struct *owner)
-{
-	if (sem->owner != owner)
-		return false;
-
-	/*
-	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
-	 * sem->owner still matches owner, if that fails, owner might
-	 * point to free()d memory, if it still matches, the rcu_read_lock()
-	 * ensures the memory stays valid.
-	 */
-	barrier();
-
-	return owner->on_cpu;
+	return ret;
 }
 
 static noinline
 bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
 {
+	long count;
+
 	rcu_read_lock();
-	while (owner_running(sem, owner)) {
-		if (need_resched())
-			break;
+	while (sem->owner == owner) {
+		/*
+		 * Ensure we emit the owner->on_cpu, dereference _after_
+		 * checking sem->owner still matches owner, if that fails,
+		 * owner might point to free()d memory, if it still matches,
+		 * the rcu_read_lock() ensures the memory stays valid.
+		 */
+		barrier();
+
+		/* abort spinning when need_resched or owner is not running */
+		if (!owner->on_cpu || need_resched()) {
+			rcu_read_unlock();
+			return false;
+		}
 
 		cpu_relax_lowlatency();
 	}
 	rcu_read_unlock();
 
+	if (READ_ONCE(sem->owner))
+		return true; /* new owner, continue spinning */
+
 	/*
-	 * We break out the loop above on need_resched() or when the
-	 * owner changed, which is a sign for heavy contention. Return
-	 * success only when sem->owner is NULL.
+	 * When the owner is not set, the lock could be free or
+	 * held by readers. Check the counter to verify the
+	 * state.
 	 */
-	return sem->owner == NULL;
+	count = READ_ONCE(sem->count);
+	return (count == 0 || count == RWSEM_WAITING_BIAS);
 }
 
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -358,7 +376,7 @@
 		goto done;
 
 	while (true) {
-		owner = ACCESS_ONCE(sem->owner);
+		owner = READ_ONCE(sem->owner);
 		if (owner && !rwsem_spin_on_owner(sem, owner))
 			break;
 
@@ -432,7 +450,7 @@
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	if (waiting) {
-		count = ACCESS_ONCE(sem->count);
+		count = READ_ONCE(sem->count);
 
 		/*
 		 * If there were already threads queued before us and there are
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e2d3bc7..205be0c 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -9,29 +9,9 @@
 #include <linux/sched.h>
 #include <linux/export.h>
 #include <linux/rwsem.h>
-
 #include <linux/atomic.h>
 
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-	sem->owner = current;
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-	sem->owner = NULL;
-}
-
-#else
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-}
-#endif
+#include "rwsem.h"
 
 /*
  * lock for reading
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
new file mode 100644
index 0000000..870ed9a
--- /dev/null
+++ b/kernel/locking/rwsem.h
@@ -0,0 +1,20 @@
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+	sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+	sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
diff --git a/kernel/module.c b/kernel/module.c
index 99fdf94..ec53f59 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2479,6 +2479,23 @@
 	return 0;
 }
 
+#define COPY_CHUNK_SIZE (16*PAGE_SIZE)
+
+static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned long len)
+{
+	do {
+		unsigned long n = min(len, COPY_CHUNK_SIZE);
+
+		if (copy_from_user(dst, usrc, n) != 0)
+			return -EFAULT;
+		cond_resched();
+		dst += n;
+		usrc += n;
+		len -= n;
+	} while (len);
+	return 0;
+}
+
 /* Sets info->hdr and info->len. */
 static int copy_module_from_user(const void __user *umod, unsigned long len,
 				  struct load_info *info)
@@ -2498,7 +2515,7 @@
 	if (!info->hdr)
 		return -ENOMEM;
 
-	if (copy_from_user(info->hdr, umod, info->len) != 0) {
+	if (copy_chunked_from_user(info->hdr, umod, info->len) != 0) {
 		vfree(info->hdr);
 		return -EFAULT;
 	}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index c24d5a2..5235dd4 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -955,25 +955,6 @@
 	}
 }
 
-static bool is_nosave_page(unsigned long pfn)
-{
-	struct nosave_region *region;
-
-	list_for_each_entry(region, &nosave_regions, list) {
-		if (pfn >= region->start_pfn && pfn < region->end_pfn) {
-			pr_err("PM: %#010llx in e820 nosave region: "
-			       "[mem %#010llx-%#010llx]\n",
-			       (unsigned long long) pfn << PAGE_SHIFT,
-			       (unsigned long long) region->start_pfn << PAGE_SHIFT,
-			       ((unsigned long long) region->end_pfn << PAGE_SHIFT)
-					- 1);
-			return true;
-		}
-	}
-
-	return false;
-}
-
 /**
  *	create_basic_memory_bitmaps - create bitmaps needed for marking page
  *	frames that should not be saved and free page frames.  The pointers
@@ -2042,7 +2023,7 @@
 	do {
 		pfn = memory_bm_next_pfn(bm);
 		if (likely(pfn != BM_END_OF_MAP)) {
-			if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn))
+			if (likely(pfn_valid(pfn)))
 				swsusp_set_page_free(pfn_to_page(pfn));
 			else
 				return -EFAULT;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 62671f5..2f7937e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -306,6 +306,9 @@
  */
 int sysctl_sched_rt_runtime = 950000;
 
+/* cpus with isolated domains */
+cpumask_var_t cpu_isolated_map;
+
 /*
  * this_rq_lock - lock this runqueue and disable interrupts.
  */
@@ -690,6 +693,23 @@
 bool sched_can_stop_tick(void)
 {
 	/*
+	 * FIFO realtime policy runs the highest priority task. Other runnable
+	 * tasks are of a lower priority. The scheduler tick does nothing.
+	 */
+	if (current->policy == SCHED_FIFO)
+		return true;
+
+	/*
+	 * Round-robin realtime tasks time slice with other tasks at the same
+	 * realtime priority. Is this task the only one at this priority?
+	 */
+	if (current->policy == SCHED_RR) {
+		struct sched_rt_entity *rt_se = &current->rt;
+
+		return rt_se->run_list.prev == rt_se->run_list.next;
+	}
+
+	/*
 	 * More than one running task need preemption.
 	 * nr_running update is assumed to be visible
 	 * after IPI is sent from wakers.
@@ -996,6 +1016,13 @@
 		rq_clock_skip_update(rq, true);
 }
 
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+	atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
 #ifdef CONFIG_SMP
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
@@ -1026,10 +1053,18 @@
 	trace_sched_migrate_task(p, new_cpu);
 
 	if (task_cpu(p) != new_cpu) {
+		struct task_migration_notifier tmn;
+
 		if (p->sched_class->migrate_task_rq)
 			p->sched_class->migrate_task_rq(p, new_cpu);
 		p->se.nr_migrations++;
 		perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+
+		tmn.task = p;
+		tmn.from_cpu = task_cpu(p);
+		tmn.to_cpu = new_cpu;
+
+		atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
 	}
 
 	__set_task_cpu(p, new_cpu);
@@ -5320,36 +5355,13 @@
 static int sched_cpu_inactive(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
-	unsigned long flags;
-	long cpu = (long)hcpu;
-	struct dl_bw *dl_b;
-
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_PREPARE:
-		set_cpu_active(cpu, false);
-
-		/* explicitly allow suspend */
-		if (!(action & CPU_TASKS_FROZEN)) {
-			bool overflow;
-			int cpus;
-
-			rcu_read_lock_sched();
-			dl_b = dl_bw_of(cpu);
-
-			raw_spin_lock_irqsave(&dl_b->lock, flags);
-			cpus = dl_bw_cpus(cpu);
-			overflow = __dl_overflow(dl_b, cpus, 0, 0);
-			raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-			rcu_read_unlock_sched();
-
-			if (overflow)
-				return notifier_from_errno(-EBUSY);
-		}
+		set_cpu_active((long)hcpu, false);
 		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
 	}
-
-	return NOTIFY_DONE;
 }
 
 static int __init migration_init(void)
@@ -5430,17 +5442,6 @@
 			break;
 		}
 
-		/*
-		 * Even though we initialize ->capacity to something semi-sane,
-		 * we leave capacity_orig unset. This allows us to detect if
-		 * domain iteration is still funny without causing /0 traps.
-		 */
-		if (!group->sgc->capacity_orig) {
-			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
-			break;
-		}
-
 		if (!cpumask_weight(sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
@@ -5813,9 +5814,6 @@
 	update_top_cache_domain(cpu);
 }
 
-/* cpus with isolated domains */
-static cpumask_var_t cpu_isolated_map;
-
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
@@ -5924,7 +5922,6 @@
 		 * die on a /0 trap.
 		 */
 		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -6235,6 +6232,7 @@
 	 */
 
 	if (sd->flags & SD_SHARE_CPUCAPACITY) {
+		sd->flags |= SD_PREFER_SIBLING;
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
 
@@ -7000,7 +6998,6 @@
 		 */
 
 	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
 		cpuset_update_active_cpus(true);
 		break;
 	default:
@@ -7012,8 +7009,30 @@
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
 			       void *hcpu)
 {
-	switch (action) {
+	unsigned long flags;
+	long cpu = (long)hcpu;
+	struct dl_bw *dl_b;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_PREPARE:
+		/* explicitly allow suspend */
+		if (!(action & CPU_TASKS_FROZEN)) {
+			bool overflow;
+			int cpus;
+
+			rcu_read_lock_sched();
+			dl_b = dl_bw_of(cpu);
+
+			raw_spin_lock_irqsave(&dl_b->lock, flags);
+			cpus = dl_bw_cpus(cpu);
+			overflow = __dl_overflow(dl_b, cpus, 0, 0);
+			raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+			rcu_read_unlock_sched();
+
+			if (overflow)
+				return notifier_from_errno(-EBUSY);
+		}
 		cpuset_update_active_cpus(false);
 		break;
 	case CPU_DOWN_PREPARE_FROZEN:
@@ -7158,8 +7177,8 @@
 		rq->calc_load_active = 0;
 		rq->calc_load_update = jiffies + LOAD_FREQ;
 		init_cfs_rq(&rq->cfs);
-		init_rt_rq(&rq->rt, rq);
-		init_dl_rq(&rq->dl, rq);
+		init_rt_rq(&rq->rt);
+		init_dl_rq(&rq->dl);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -7199,7 +7218,7 @@
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+		rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
@@ -7798,7 +7817,7 @@
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-static int sched_dl_global_constraints(void)
+static int sched_dl_global_validate(void)
 {
 	u64 runtime = global_rt_runtime();
 	u64 period = global_rt_period();
@@ -7899,11 +7918,11 @@
 		if (ret)
 			goto undo;
 
-		ret = sched_rt_global_constraints();
+		ret = sched_dl_global_validate();
 		if (ret)
 			goto undo;
 
-		ret = sched_dl_global_constraints();
+		ret = sched_rt_global_constraints();
 		if (ret)
 			goto undo;
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3fa8fa6..5e95145 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -69,7 +69,7 @@
 	dl_b->total_bw = 0;
 }
 
-void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
+void init_dl_rq(struct dl_rq *dl_rq)
 {
 	dl_rq->rb_root = RB_ROOT;
 
@@ -218,6 +218,52 @@
 	rq->post_schedule = has_pushable_dl_tasks(rq);
 }
 
+static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
+
+static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
+{
+	struct rq *later_rq = NULL;
+	bool fallback = false;
+
+	later_rq = find_lock_later_rq(p, rq);
+
+	if (!later_rq) {
+		int cpu;
+
+		/*
+		 * If we cannot preempt any rq, fall back to pick any
+		 * online cpu.
+		 */
+		fallback = true;
+		cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
+		if (cpu >= nr_cpu_ids) {
+			/*
+			 * Fail to find any suitable cpu.
+			 * The task will never come back!
+			 */
+			BUG_ON(dl_bandwidth_enabled());
+
+			/*
+			 * If admission control is disabled we
+			 * try a little harder to let the task
+			 * run.
+			 */
+			cpu = cpumask_any(cpu_active_mask);
+		}
+		later_rq = cpu_rq(cpu);
+		double_lock_balance(rq, later_rq);
+	}
+
+	deactivate_task(rq, p, 0);
+	set_task_cpu(p, later_rq->cpu);
+	activate_task(later_rq, p, ENQUEUE_REPLENISH);
+
+	if (!fallback)
+		resched_curr(later_rq);
+
+	double_unlock_balance(rq, later_rq);
+}
+
 #else
 
 static inline
@@ -514,7 +560,7 @@
 	unsigned long flags;
 	struct rq *rq;
 
-	rq = task_rq_lock(current, &flags);
+	rq = task_rq_lock(p, &flags);
 
 	/*
 	 * We need to take care of several possible races here:
@@ -536,6 +582,17 @@
 	sched_clock_tick();
 	update_rq_clock(rq);
 
+#ifdef CONFIG_SMP
+	/*
+	 * If we find that the rq the task was on is no longer
+	 * available, we need to select a new rq.
+	 */
+	if (unlikely(!rq->online)) {
+		dl_task_offline_migration(rq, p);
+		goto unlock;
+	}
+#endif
+
 	/*
 	 * If the throttle happened during sched-out; like:
 	 *
@@ -569,7 +626,7 @@
 		push_dl_task(rq);
 #endif
 unlock:
-	task_rq_unlock(rq, current, &flags);
+	task_rq_unlock(rq, p, &flags);
 
 	return HRTIMER_NORESTART;
 }
@@ -914,6 +971,12 @@
 	}
 	update_rq_clock(rq);
 	update_curr_dl(rq);
+	/*
+	 * Tell update_rq_clock() that we've just updated,
+	 * so we don't do microscopic update in schedule()
+	 * and double the fastpath cost.
+	 */
+	rq_clock_skip_update(rq, true);
 }
 
 #ifdef CONFIG_SMP
@@ -1659,14 +1722,6 @@
 {
 	int check_resched = 1;
 
-	/*
-	 * If p is throttled, don't consider the possibility
-	 * of preempting rq->curr, the check will be done right
-	 * after its runtime will get replenished.
-	 */
-	if (unlikely(p->dl.dl_throttled))
-		return;
-
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8baaf85..a245c1f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -71,7 +71,7 @@
 	if (!se) {
 		struct sched_avg *avg = &cpu_rq(cpu)->avg;
 		P(avg->runnable_avg_sum);
-		P(avg->runnable_avg_period);
+		P(avg->avg_period);
 		return;
 	}
 
@@ -94,8 +94,10 @@
 	P(se->load.weight);
 #ifdef CONFIG_SMP
 	P(se->avg.runnable_avg_sum);
-	P(se->avg.runnable_avg_period);
+	P(se->avg.running_avg_sum);
+	P(se->avg.avg_period);
 	P(se->avg.load_avg_contrib);
+	P(se->avg.utilization_avg_contrib);
 	P(se->avg.decay_count);
 #endif
 #undef PN
@@ -214,6 +216,8 @@
 			cfs_rq->runnable_load_avg);
 	SEQ_printf(m, "  .%-30s: %ld\n", "blocked_load_avg",
 			cfs_rq->blocked_load_avg);
+	SEQ_printf(m, "  .%-30s: %ld\n", "utilization_load_avg",
+			cfs_rq->utilization_load_avg);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_contrib",
 			cfs_rq->tg_load_contrib);
@@ -636,8 +640,10 @@
 	P(se.load.weight);
 #ifdef CONFIG_SMP
 	P(se.avg.runnable_avg_sum);
-	P(se.avg.runnable_avg_period);
+	P(se.avg.running_avg_sum);
+	P(se.avg.avg_period);
 	P(se.avg.load_avg_contrib);
+	P(se.avg.utilization_avg_contrib);
 	P(se.avg.decay_count);
 #endif
 	P(policy);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bcfe320..ffeaa41 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -670,6 +670,7 @@
 static unsigned long task_h_load(struct task_struct *p);
 
 static inline void __update_task_entity_contrib(struct sched_entity *se);
+static inline void __update_task_entity_utilization(struct sched_entity *se);
 
 /* Give new task start runnable values to heavy its load in infant time */
 void init_task_runnable_average(struct task_struct *p)
@@ -677,9 +678,10 @@
 	u32 slice;
 
 	slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
-	p->se.avg.runnable_avg_sum = slice;
-	p->se.avg.runnable_avg_period = slice;
+	p->se.avg.runnable_avg_sum = p->se.avg.running_avg_sum = slice;
+	p->se.avg.avg_period = slice;
 	__update_task_entity_contrib(&p->se);
+	__update_task_entity_utilization(&p->se);
 }
 #else
 void init_task_runnable_average(struct task_struct *p)
@@ -1196,9 +1198,11 @@
 static bool load_too_imbalanced(long src_load, long dst_load,
 				struct task_numa_env *env)
 {
-	long imb, old_imb;
-	long orig_src_load, orig_dst_load;
 	long src_capacity, dst_capacity;
+	long orig_src_load;
+	long load_a, load_b;
+	long moved_load;
+	long imb;
 
 	/*
 	 * The load is corrected for the CPU capacity available on each node.
@@ -1211,30 +1215,39 @@
 	dst_capacity = env->dst_stats.compute_capacity;
 
 	/* We care about the slope of the imbalance, not the direction. */
-	if (dst_load < src_load)
-		swap(dst_load, src_load);
+	load_a = dst_load;
+	load_b = src_load;
+	if (load_a < load_b)
+		swap(load_a, load_b);
 
 	/* Is the difference below the threshold? */
-	imb = dst_load * src_capacity * 100 -
-	      src_load * dst_capacity * env->imbalance_pct;
+	imb = load_a * src_capacity * 100 -
+		load_b * dst_capacity * env->imbalance_pct;
 	if (imb <= 0)
 		return false;
 
 	/*
 	 * The imbalance is above the allowed threshold.
-	 * Compare it with the old imbalance.
+	 * Allow a move that brings us closer to a balanced situation,
+	 * without moving things past the point of balance.
 	 */
 	orig_src_load = env->src_stats.load;
-	orig_dst_load = env->dst_stats.load;
 
-	if (orig_dst_load < orig_src_load)
-		swap(orig_dst_load, orig_src_load);
+	/*
+	 * In a task swap, there will be one load moving from src to dst,
+	 * and another moving back. This is the net sum of both moves.
+	 * A simple task move will always have a positive value.
+	 * Allow the move if it brings the system closer to a balanced
+	 * situation, without crossing over the balance point.
+	 */
+	moved_load = orig_src_load - src_load;
 
-	old_imb = orig_dst_load * src_capacity * 100 -
-		  orig_src_load * dst_capacity * env->imbalance_pct;
-
-	/* Would this change make things worse? */
-	return (imb > old_imb);
+	if (moved_load > 0)
+		/* Moving src -> dst. Did we overshoot balance? */
+		return src_load * dst_capacity < dst_load * src_capacity;
+	else
+		/* Moving dst -> src. Did we overshoot balance? */
+		return dst_load * src_capacity < src_load * dst_capacity;
 }
 
 /*
@@ -1675,7 +1688,7 @@
 		*period = now - p->last_task_numa_placement;
 	} else {
 		delta = p->se.avg.runnable_avg_sum;
-		*period = p->se.avg.runnable_avg_period;
+		*period = p->se.avg.avg_period;
 	}
 
 	p->last_sum_exec_runtime = runtime;
@@ -1765,6 +1778,8 @@
 			}
 		}
 		/* Next round, evaluate the nodes within max_group. */
+		if (!max_faults)
+			break;
 		nodes = max_group;
 	}
 	return nid;
@@ -2165,8 +2180,10 @@
 		vma = mm->mmap;
 	}
 	for (; vma; vma = vma->vm_next) {
-		if (!vma_migratable(vma) || !vma_policy_mof(vma))
+		if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
+			is_vm_hugetlb_page(vma)) {
 			continue;
+		}
 
 		/*
 		 * Shared library pages mapped by multiple processes are not
@@ -2501,13 +2518,15 @@
  *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
-static __always_inline int __update_entity_runnable_avg(u64 now,
+static __always_inline int __update_entity_runnable_avg(u64 now, int cpu,
 							struct sched_avg *sa,
-							int runnable)
+							int runnable,
+							int running)
 {
 	u64 delta, periods;
 	u32 runnable_contrib;
 	int delta_w, decayed = 0;
+	unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
 
 	delta = now - sa->last_runnable_update;
 	/*
@@ -2529,7 +2548,7 @@
 	sa->last_runnable_update = now;
 
 	/* delta_w is the amount already accumulated against our next period */
-	delta_w = sa->runnable_avg_period % 1024;
+	delta_w = sa->avg_period % 1024;
 	if (delta + delta_w >= 1024) {
 		/* period roll-over */
 		decayed = 1;
@@ -2542,7 +2561,10 @@
 		delta_w = 1024 - delta_w;
 		if (runnable)
 			sa->runnable_avg_sum += delta_w;
-		sa->runnable_avg_period += delta_w;
+		if (running)
+			sa->running_avg_sum += delta_w * scale_freq
+				>> SCHED_CAPACITY_SHIFT;
+		sa->avg_period += delta_w;
 
 		delta -= delta_w;
 
@@ -2552,20 +2574,28 @@
 
 		sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
 						  periods + 1);
-		sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
+		sa->running_avg_sum = decay_load(sa->running_avg_sum,
+						  periods + 1);
+		sa->avg_period = decay_load(sa->avg_period,
 						     periods + 1);
 
 		/* Efficiently calculate \sum (1..n_period) 1024*y^i */
 		runnable_contrib = __compute_runnable_contrib(periods);
 		if (runnable)
 			sa->runnable_avg_sum += runnable_contrib;
-		sa->runnable_avg_period += runnable_contrib;
+		if (running)
+			sa->running_avg_sum += runnable_contrib * scale_freq
+				>> SCHED_CAPACITY_SHIFT;
+		sa->avg_period += runnable_contrib;
 	}
 
 	/* Remainder of delta accrued against u_0` */
 	if (runnable)
 		sa->runnable_avg_sum += delta;
-	sa->runnable_avg_period += delta;
+	if (running)
+		sa->running_avg_sum += delta * scale_freq
+			>> SCHED_CAPACITY_SHIFT;
+	sa->avg_period += delta;
 
 	return decayed;
 }
@@ -2582,6 +2612,8 @@
 		return 0;
 
 	se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
+	se->avg.utilization_avg_contrib =
+		decay_load(se->avg.utilization_avg_contrib, decays);
 
 	return decays;
 }
@@ -2617,7 +2649,7 @@
 
 	/* The fraction of a cpu used by this cfs_rq */
 	contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
-			  sa->runnable_avg_period + 1);
+			  sa->avg_period + 1);
 	contrib -= cfs_rq->tg_runnable_contrib;
 
 	if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
@@ -2670,7 +2702,8 @@
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
-	__update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable);
+	__update_entity_runnable_avg(rq_clock_task(rq), cpu_of(rq), &rq->avg,
+			runnable, runnable);
 	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
 }
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -2688,7 +2721,7 @@
 
 	/* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
 	contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);
-	contrib /= (se->avg.runnable_avg_period + 1);
+	contrib /= (se->avg.avg_period + 1);
 	se->avg.load_avg_contrib = scale_load(contrib);
 }
 
@@ -2707,6 +2740,30 @@
 	return se->avg.load_avg_contrib - old_contrib;
 }
 
+
+static inline void __update_task_entity_utilization(struct sched_entity *se)
+{
+	u32 contrib;
+
+	/* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
+	contrib = se->avg.running_avg_sum * scale_load_down(SCHED_LOAD_SCALE);
+	contrib /= (se->avg.avg_period + 1);
+	se->avg.utilization_avg_contrib = scale_load(contrib);
+}
+
+static long __update_entity_utilization_avg_contrib(struct sched_entity *se)
+{
+	long old_contrib = se->avg.utilization_avg_contrib;
+
+	if (entity_is_task(se))
+		__update_task_entity_utilization(se);
+	else
+		se->avg.utilization_avg_contrib =
+					group_cfs_rq(se)->utilization_load_avg;
+
+	return se->avg.utilization_avg_contrib - old_contrib;
+}
+
 static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
 						 long load_contrib)
 {
@@ -2723,7 +2780,8 @@
 					  int update_cfs_rq)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	long contrib_delta;
+	long contrib_delta, utilization_delta;
+	int cpu = cpu_of(rq_of(cfs_rq));
 	u64 now;
 
 	/*
@@ -2735,18 +2793,22 @@
 	else
 		now = cfs_rq_clock_task(group_cfs_rq(se));
 
-	if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
+	if (!__update_entity_runnable_avg(now, cpu, &se->avg, se->on_rq,
+					cfs_rq->curr == se))
 		return;
 
 	contrib_delta = __update_entity_load_avg_contrib(se);
+	utilization_delta = __update_entity_utilization_avg_contrib(se);
 
 	if (!update_cfs_rq)
 		return;
 
-	if (se->on_rq)
+	if (se->on_rq) {
 		cfs_rq->runnable_load_avg += contrib_delta;
-	else
+		cfs_rq->utilization_load_avg += utilization_delta;
+	} else {
 		subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+	}
 }
 
 /*
@@ -2821,6 +2883,7 @@
 	}
 
 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+	cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib;
 	/* we force update consideration on load-balancer moves */
 	update_cfs_rq_blocked_load(cfs_rq, !wakeup);
 }
@@ -2839,6 +2902,7 @@
 	update_cfs_rq_blocked_load(cfs_rq, !sleep);
 
 	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+	cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib;
 	if (sleep) {
 		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
 		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -3176,6 +3240,7 @@
 		 */
 		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
+		update_entity_load_avg(se, 1);
 	}
 
 	update_stats_curr_start(cfs_rq, se);
@@ -4302,6 +4367,11 @@
 	return cpu_rq(cpu)->cpu_capacity;
 }
 
+static unsigned long capacity_orig_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -4715,6 +4785,33 @@
 done:
 	return target;
 }
+/*
+ * get_cpu_usage returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the usage with the capacity of the CPU that is available for CFS
+ * task (ie cpu_capacity).
+ * cfs.utilization_load_avg is the sum of running time of runnable tasks on a
+ * CPU. It represents the amount of utilization of a CPU in the range
+ * [0..SCHED_LOAD_SCALE].  The usage of a CPU can't be higher than the full
+ * capacity of the CPU because it's about the running time on this CPU.
+ * Nevertheless, cfs.utilization_load_avg can be higher than SCHED_LOAD_SCALE
+ * because of unfortunate rounding in avg_period and running_load_avg or just
+ * after migrating tasks until the average stabilizes with the new running
+ * time. So we need to check that the usage stays into the range
+ * [0..cpu_capacity_orig] and cap if necessary.
+ * Without capping the usage, a group could be seen as overloaded (CPU0 usage
+ * at 121% + CPU1 usage at 80%) whereas CPU1 has 20% of available capacity
+ */
+static int get_cpu_usage(int cpu)
+{
+	unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
+	unsigned long capacity = capacity_orig_of(cpu);
+
+	if (usage >= SCHED_LOAD_SCALE)
+		return capacity;
+
+	return (usage * capacity) >> SCHED_LOAD_SHIFT;
+}
 
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
@@ -5841,12 +5938,12 @@
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long load_per_task;
 	unsigned long group_capacity;
+	unsigned long group_usage; /* Total usage of the group */
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
-	unsigned int group_capacity_factor;
 	unsigned int idle_cpus;
 	unsigned int group_weight;
 	enum group_type group_type;
-	int group_has_free_capacity;
+	int group_no_capacity;
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -5917,16 +6014,6 @@
 	return load_idx;
 }
 
-static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
-{
-	return SCHED_CAPACITY_SCALE;
-}
-
-unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
-{
-	return default_scale_capacity(sd, cpu);
-}
-
 static unsigned long default_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	if ((sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
@@ -5943,7 +6030,7 @@
 static unsigned long scale_rt_capacity(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	u64 total, available, age_stamp, avg;
+	u64 total, used, age_stamp, avg;
 	s64 delta;
 
 	/*
@@ -5959,19 +6046,12 @@
 
 	total = sched_avg_period() + delta;
 
-	if (unlikely(total < avg)) {
-		/* Ensures that capacity won't end up being negative */
-		available = 0;
-	} else {
-		available = total - avg;
-	}
+	used = div_u64(avg, total);
 
-	if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
-		total = SCHED_CAPACITY_SCALE;
+	if (likely(used < SCHED_CAPACITY_SCALE))
+		return SCHED_CAPACITY_SCALE - used;
 
-	total >>= SCHED_CAPACITY_SHIFT;
-
-	return div_u64(available, total);
+	return 1;
 }
 
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
@@ -5986,14 +6066,7 @@
 
 	capacity >>= SCHED_CAPACITY_SHIFT;
 
-	sdg->sgc->capacity_orig = capacity;
-
-	if (sched_feat(ARCH_CAPACITY))
-		capacity *= arch_scale_freq_capacity(sd, cpu);
-	else
-		capacity *= default_scale_capacity(sd, cpu);
-
-	capacity >>= SCHED_CAPACITY_SHIFT;
+	cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
 	capacity *= scale_rt_capacity(cpu);
 	capacity >>= SCHED_CAPACITY_SHIFT;
@@ -6009,7 +6082,7 @@
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long capacity, capacity_orig;
+	unsigned long capacity;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
@@ -6021,7 +6094,7 @@
 		return;
 	}
 
-	capacity_orig = capacity = 0;
+	capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -6041,19 +6114,15 @@
 			 * Use capacity_of(), which is set irrespective of domains
 			 * in update_cpu_capacity().
 			 *
-			 * This avoids capacity/capacity_orig from being 0 and
+			 * This avoids capacity from being 0 and
 			 * causing divide-by-zero issues on boot.
-			 *
-			 * Runtime updates will correct capacity_orig.
 			 */
 			if (unlikely(!rq->sd)) {
-				capacity_orig += capacity_of(cpu);
 				capacity += capacity_of(cpu);
 				continue;
 			}
 
 			sgc = rq->sd->groups->sgc;
-			capacity_orig += sgc->capacity_orig;
 			capacity += sgc->capacity;
 		}
 	} else  {
@@ -6064,39 +6133,24 @@
 
 		group = child->groups;
 		do {
-			capacity_orig += group->sgc->capacity_orig;
 			capacity += group->sgc->capacity;
 			group = group->next;
 		} while (group != child->groups);
 	}
 
-	sdg->sgc->capacity_orig = capacity_orig;
 	sdg->sgc->capacity = capacity;
 }
 
 /*
- * Try and fix up capacity for tiny siblings, this is needed when
- * things like SD_ASYM_PACKING need f_b_g to select another sibling
- * which on its own isn't powerful enough.
- *
- * See update_sd_pick_busiest() and check_asym_packing().
+ * Check whether the capacity of the rq has been noticeably reduced by side
+ * activity. The imbalance_pct is used for the threshold.
+ * Return true is the capacity is reduced
  */
 static inline int
-fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 {
-	/*
-	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
-	 */
-	if (!(sd->flags & SD_SHARE_CPUCAPACITY))
-		return 0;
-
-	/*
-	 * If ~90% of the cpu_capacity is still there, we're good.
-	 */
-	if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
-		return 1;
-
-	return 0;
+	return ((rq->cpu_capacity * sd->imbalance_pct) <
+				(rq->cpu_capacity_orig * 100));
 }
 
 /*
@@ -6134,37 +6188,56 @@
 }
 
 /*
- * Compute the group capacity factor.
- *
- * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
- * first dividing out the smt factor and computing the actual number of cores
- * and limit unit capacity with that.
+ * group_has_capacity returns true if the group has spare capacity that could
+ * be used by some tasks.
+ * We consider that a group has spare capacity if the  * number of task is
+ * smaller than the number of CPUs or if the usage is lower than the available
+ * capacity for CFS tasks.
+ * For the latter, we use a threshold to stabilize the state, to take into
+ * account the variance of the tasks' load and to return true if the available
+ * capacity in meaningful for the load balancer.
+ * As an example, an available capacity of 1% can appear but it doesn't make
+ * any benefit for the load balance.
  */
-static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
+static inline bool
+group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-	unsigned int capacity_factor, smt, cpus;
-	unsigned int capacity, capacity_orig;
+	if (sgs->sum_nr_running < sgs->group_weight)
+		return true;
 
-	capacity = group->sgc->capacity;
-	capacity_orig = group->sgc->capacity_orig;
-	cpus = group->group_weight;
+	if ((sgs->group_capacity * 100) >
+			(sgs->group_usage * env->sd->imbalance_pct))
+		return true;
 
-	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
-	smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
-	capacity_factor = cpus / smt; /* cores */
-
-	capacity_factor = min_t(unsigned,
-		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
-	if (!capacity_factor)
-		capacity_factor = fix_small_capacity(env->sd, group);
-
-	return capacity_factor;
+	return false;
 }
 
-static enum group_type
-group_classify(struct sched_group *group, struct sg_lb_stats *sgs)
+/*
+ *  group_is_overloaded returns true if the group has more tasks than it can
+ *  handle.
+ *  group_is_overloaded is not equals to !group_has_capacity because a group
+ *  with the exact right number of tasks, has no more spare capacity but is not
+ *  overloaded so both group_has_capacity and group_is_overloaded return
+ *  false.
+ */
+static inline bool
+group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-	if (sgs->sum_nr_running > sgs->group_capacity_factor)
+	if (sgs->sum_nr_running <= sgs->group_weight)
+		return false;
+
+	if ((sgs->group_capacity * 100) <
+			(sgs->group_usage * env->sd->imbalance_pct))
+		return true;
+
+	return false;
+}
+
+static enum group_type group_classify(struct lb_env *env,
+		struct sched_group *group,
+		struct sg_lb_stats *sgs)
+{
+	if (sgs->group_no_capacity)
 		return group_overloaded;
 
 	if (sg_imbalanced(group))
@@ -6202,6 +6275,7 @@
 			load = source_load(i, load_idx);
 
 		sgs->group_load += load;
+		sgs->group_usage += get_cpu_usage(i);
 		sgs->sum_nr_running += rq->cfs.h_nr_running;
 
 		if (rq->nr_running > 1)
@@ -6224,11 +6298,9 @@
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
 
 	sgs->group_weight = group->group_weight;
-	sgs->group_capacity_factor = sg_capacity_factor(env, group);
-	sgs->group_type = group_classify(group, sgs);
 
-	if (sgs->group_capacity_factor > sgs->sum_nr_running)
-		sgs->group_has_free_capacity = 1;
+	sgs->group_no_capacity = group_is_overloaded(env, sgs);
+	sgs->group_type = group_classify(env, group, sgs);
 }
 
 /**
@@ -6350,18 +6422,19 @@
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
-		 * first, lower the sg capacity factor to one so that we'll try
+		 * first, lower the sg capacity so that we'll try
 		 * and move all the excess tasks away. We lower the capacity
 		 * of a group only if the local group has the capacity to fit
-		 * these excess tasks, i.e. nr_running < group_capacity_factor. The
-		 * extra check prevents the case where you always pull from the
-		 * heaviest group when it is already under-utilized (possible
-		 * with a large weight task outweighs the tasks on the system).
+		 * these excess tasks. The extra check prevents the case where
+		 * you always pull from the heaviest group when it is already
+		 * under-utilized (possible with a large weight task outweighs
+		 * the tasks on the system).
 		 */
 		if (prefer_sibling && sds->local &&
-		    sds->local_stat.group_has_free_capacity) {
-			sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
-			sgs->group_type = group_classify(sg, sgs);
+		    group_has_capacity(env, &sds->local_stat) &&
+		    (sgs->sum_nr_running > 1)) {
+			sgs->group_no_capacity = 1;
+			sgs->group_type = group_overloaded;
 		}
 
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
@@ -6541,11 +6614,12 @@
 	 */
 	if (busiest->group_type == group_overloaded &&
 	    local->group_type   == group_overloaded) {
-		load_above_capacity =
-			(busiest->sum_nr_running - busiest->group_capacity_factor);
-
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
-		load_above_capacity /= busiest->group_capacity;
+		load_above_capacity = busiest->sum_nr_running *
+					SCHED_LOAD_SCALE;
+		if (load_above_capacity > busiest->group_capacity)
+			load_above_capacity -= busiest->group_capacity;
+		else
+			load_above_capacity = ~0UL;
 	}
 
 	/*
@@ -6608,6 +6682,7 @@
 	local = &sds.local_stat;
 	busiest = &sds.busiest_stat;
 
+	/* ASYM feature bypasses nice load balance check */
 	if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
 	    check_asym_packing(env, &sds))
 		return sds.busiest;
@@ -6628,8 +6703,8 @@
 		goto force_balance;
 
 	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-	if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
-	    !busiest->group_has_free_capacity)
+	if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) &&
+	    busiest->group_no_capacity)
 		goto force_balance;
 
 	/*
@@ -6688,7 +6763,7 @@
 	int i;
 
 	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
-		unsigned long capacity, capacity_factor, wl;
+		unsigned long capacity, wl;
 		enum fbq_type rt;
 
 		rq = cpu_rq(i);
@@ -6717,9 +6792,6 @@
 			continue;
 
 		capacity = capacity_of(i);
-		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
-		if (!capacity_factor)
-			capacity_factor = fix_small_capacity(env->sd, group);
 
 		wl = weighted_cpuload(i);
 
@@ -6727,7 +6799,9 @@
 		 * When comparing with imbalance, use weighted_cpuload()
 		 * which is not scaled with the cpu capacity.
 		 */
-		if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
+
+		if (rq->nr_running == 1 && wl > env->imbalance &&
+		    !check_cpu_capacity(rq, env->sd))
 			continue;
 
 		/*
@@ -6775,6 +6849,19 @@
 			return 1;
 	}
 
+	/*
+	 * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
+	 * It's worth migrating the task if the src_cpu's capacity is reduced
+	 * because of other sched_class or IRQs if more capacity stays
+	 * available on dst_cpu.
+	 */
+	if ((env->idle != CPU_NOT_IDLE) &&
+	    (env->src_rq->cfs.h_nr_running == 1)) {
+		if ((check_cpu_capacity(env->src_rq, sd)) &&
+		    (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
+			return 1;
+	}
+
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -6874,6 +6961,9 @@
 
 	schedstat_add(sd, lb_imbalance[idle], env.imbalance);
 
+	env.src_cpu = busiest->cpu;
+	env.src_rq = busiest;
+
 	ld_moved = 0;
 	if (busiest->nr_running > 1) {
 		/*
@@ -6883,8 +6973,6 @@
 		 * correctly treated as an imbalance.
 		 */
 		env.flags |= LBF_ALL_PINNED;
-		env.src_cpu   = busiest->cpu;
-		env.src_rq    = busiest;
 		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
 more_balance:
@@ -7584,22 +7672,25 @@
 
 /*
  * Current heuristic for kicking the idle load balancer in the presence
- * of an idle cpu is the system.
+ * of an idle cpu in the system.
  *   - This rq has more than one task.
- *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's capacity.
+ *   - This rq has at least one CFS task and the capacity of the CPU is
+ *     significantly reduced because of RT tasks or IRQs.
+ *   - At parent of LLC scheduler domain level, this cpu's scheduler group has
+ *     multiple busy cpu.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
-static inline int nohz_kick_needed(struct rq *rq)
+static inline bool nohz_kick_needed(struct rq *rq)
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
 	struct sched_group_capacity *sgc;
 	int nr_busy, cpu = rq->cpu;
+	bool kick = false;
 
 	if (unlikely(rq->idle_balance))
-		return 0;
+		return false;
 
        /*
 	* We may be recently in ticked or tickless idle mode. At the first
@@ -7613,38 +7704,46 @@
 	 * balancing.
 	 */
 	if (likely(!atomic_read(&nohz.nr_cpus)))
-		return 0;
+		return false;
 
 	if (time_before(now, nohz.next_balance))
-		return 0;
+		return false;
 
 	if (rq->nr_running >= 2)
-		goto need_kick;
+		return true;
 
 	rcu_read_lock();
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
-
 	if (sd) {
 		sgc = sd->groups->sgc;
 		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
-		if (nr_busy > 1)
-			goto need_kick_unlock;
+		if (nr_busy > 1) {
+			kick = true;
+			goto unlock;
+		}
+
+	}
+
+	sd = rcu_dereference(rq->sd);
+	if (sd) {
+		if ((rq->cfs.h_nr_running >= 1) &&
+				check_cpu_capacity(rq, sd)) {
+			kick = true;
+			goto unlock;
+		}
 	}
 
 	sd = rcu_dereference(per_cpu(sd_asym, cpu));
-
 	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
-				  sched_domain_span(sd)) < cpu))
-		goto need_kick_unlock;
+				  sched_domain_span(sd)) < cpu)) {
+		kick = true;
+		goto unlock;
+	}
 
+unlock:
 	rcu_read_unlock();
-	return 0;
-
-need_kick_unlock:
-	rcu_read_unlock();
-need_kick:
-	return 1;
+	return kick;
 }
 #else
 static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
@@ -7660,14 +7759,16 @@
 	enum cpu_idle_type idle = this_rq->idle_balance ?
 						CPU_IDLE : CPU_NOT_IDLE;
 
-	rebalance_domains(this_rq, idle);
-
 	/*
 	 * If this cpu has a pending nohz_balance_kick, then do the
 	 * balancing on behalf of the other idle cpus whose ticks are
-	 * stopped.
+	 * stopped. Do nohz_idle_balance *before* rebalance_domains to
+	 * give the idle cpus a chance to load balance. Else we may
+	 * load balance only within the local sched_domain hierarchy
+	 * and abort nohz_idle_balance altogether if we pull some load.
 	 */
 	nohz_idle_balance(this_rq, idle);
+	rebalance_domains(this_rq, idle);
 }
 
 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 90284d1..91e33cd 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -56,6 +56,19 @@
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 
+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * In order to avoid a thundering herd attack of CPUs that are
+ * lowering their priorities at the same time, and there being
+ * a single CPU that has an RT task that can migrate and is waiting
+ * to run, where the other CPUs will try to take that CPUs
+ * rq lock and possibly create a large contention, sending an
+ * IPI to that CPU and let that CPU push the RT task to where
+ * it should go may be a better scenario.
+ */
+SCHED_FEAT(RT_PUSH_IPI, true)
+#endif
+
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 80014a1..4d207d2 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -158,8 +158,7 @@
 	 * is used from another cpu as a broadcast timer, this call may
 	 * fail if it is not available
 	 */
-	if (broadcast &&
-	    clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
+	if (broadcast && tick_broadcast_enter())
 		goto use_default;
 
 	/* Take note of the planned idle state. */
@@ -176,7 +175,7 @@
 	idle_set_state(this_rq(), NULL);
 
 	if (broadcast)
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+		tick_broadcast_exit();
 
 	/*
 	 * Give the governor an opportunity to reflect on the outcome
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f4d4b07..575da76 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -6,6 +6,7 @@
 #include "sched.h"
 
 #include <linux/slab.h>
+#include <linux/irq_work.h>
 
 int sched_rr_timeslice = RR_TIMESLICE;
 
@@ -59,7 +60,11 @@
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
-void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+#ifdef CONFIG_SMP
+static void push_irq_work_func(struct irq_work *work);
+#endif
+
+void init_rt_rq(struct rt_rq *rt_rq)
 {
 	struct rt_prio_array *array;
 	int i;
@@ -78,7 +83,14 @@
 	rt_rq->rt_nr_migratory = 0;
 	rt_rq->overloaded = 0;
 	plist_head_init(&rt_rq->pushable_tasks);
+
+#ifdef HAVE_RT_PUSH_IPI
+	rt_rq->push_flags = 0;
+	rt_rq->push_cpu = nr_cpu_ids;
+	raw_spin_lock_init(&rt_rq->push_lock);
+	init_irq_work(&rt_rq->push_work, push_irq_work_func);
 #endif
+#endif /* CONFIG_SMP */
 	/* We start is dequeued state, because no RT tasks are queued */
 	rt_rq->rt_queued = 0;
 
@@ -193,7 +205,7 @@
 		if (!rt_se)
 			goto err_free_rq;
 
-		init_rt_rq(rt_rq, cpu_rq(i));
+		init_rt_rq(rt_rq);
 		rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
 		init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
 	}
@@ -1778,6 +1790,164 @@
 		;
 }
 
+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * The search for the next cpu always starts at rq->cpu and ends
+ * when we reach rq->cpu again. It will never return rq->cpu.
+ * This returns the next cpu to check, or nr_cpu_ids if the loop
+ * is complete.
+ *
+ * rq->rt.push_cpu holds the last cpu returned by this function,
+ * or if this is the first instance, it must hold rq->cpu.
+ */
+static int rto_next_cpu(struct rq *rq)
+{
+	int prev_cpu = rq->rt.push_cpu;
+	int cpu;
+
+	cpu = cpumask_next(prev_cpu, rq->rd->rto_mask);
+
+	/*
+	 * If the previous cpu is less than the rq's CPU, then it already
+	 * passed the end of the mask, and has started from the beginning.
+	 * We end if the next CPU is greater or equal to rq's CPU.
+	 */
+	if (prev_cpu < rq->cpu) {
+		if (cpu >= rq->cpu)
+			return nr_cpu_ids;
+
+	} else if (cpu >= nr_cpu_ids) {
+		/*
+		 * We passed the end of the mask, start at the beginning.
+		 * If the result is greater or equal to the rq's CPU, then
+		 * the loop is finished.
+		 */
+		cpu = cpumask_first(rq->rd->rto_mask);
+		if (cpu >= rq->cpu)
+			return nr_cpu_ids;
+	}
+	rq->rt.push_cpu = cpu;
+
+	/* Return cpu to let the caller know if the loop is finished or not */
+	return cpu;
+}
+
+static int find_next_push_cpu(struct rq *rq)
+{
+	struct rq *next_rq;
+	int cpu;
+
+	while (1) {
+		cpu = rto_next_cpu(rq);
+		if (cpu >= nr_cpu_ids)
+			break;
+		next_rq = cpu_rq(cpu);
+
+		/* Make sure the next rq can push to this rq */
+		if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr)
+			break;
+	}
+
+	return cpu;
+}
+
+#define RT_PUSH_IPI_EXECUTING		1
+#define RT_PUSH_IPI_RESTART		2
+
+static void tell_cpu_to_push(struct rq *rq)
+{
+	int cpu;
+
+	if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
+		raw_spin_lock(&rq->rt.push_lock);
+		/* Make sure it's still executing */
+		if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
+			/*
+			 * Tell the IPI to restart the loop as things have
+			 * changed since it started.
+			 */
+			rq->rt.push_flags |= RT_PUSH_IPI_RESTART;
+			raw_spin_unlock(&rq->rt.push_lock);
+			return;
+		}
+		raw_spin_unlock(&rq->rt.push_lock);
+	}
+
+	/* When here, there's no IPI going around */
+
+	rq->rt.push_cpu = rq->cpu;
+	cpu = find_next_push_cpu(rq);
+	if (cpu >= nr_cpu_ids)
+		return;
+
+	rq->rt.push_flags = RT_PUSH_IPI_EXECUTING;
+
+	irq_work_queue_on(&rq->rt.push_work, cpu);
+}
+
+/* Called from hardirq context */
+static void try_to_push_tasks(void *arg)
+{
+	struct rt_rq *rt_rq = arg;
+	struct rq *rq, *src_rq;
+	int this_cpu;
+	int cpu;
+
+	this_cpu = rt_rq->push_cpu;
+
+	/* Paranoid check */
+	BUG_ON(this_cpu != smp_processor_id());
+
+	rq = cpu_rq(this_cpu);
+	src_rq = rq_of_rt_rq(rt_rq);
+
+again:
+	if (has_pushable_tasks(rq)) {
+		raw_spin_lock(&rq->lock);
+		push_rt_task(rq);
+		raw_spin_unlock(&rq->lock);
+	}
+
+	/* Pass the IPI to the next rt overloaded queue */
+	raw_spin_lock(&rt_rq->push_lock);
+	/*
+	 * If the source queue changed since the IPI went out,
+	 * we need to restart the search from that CPU again.
+	 */
+	if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) {
+		rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART;
+		rt_rq->push_cpu = src_rq->cpu;
+	}
+
+	cpu = find_next_push_cpu(src_rq);
+
+	if (cpu >= nr_cpu_ids)
+		rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING;
+	raw_spin_unlock(&rt_rq->push_lock);
+
+	if (cpu >= nr_cpu_ids)
+		return;
+
+	/*
+	 * It is possible that a restart caused this CPU to be
+	 * chosen again. Don't bother with an IPI, just see if we
+	 * have more to push.
+	 */
+	if (unlikely(cpu == rq->cpu))
+		goto again;
+
+	/* Try the next RT overloaded CPU */
+	irq_work_queue_on(&rt_rq->push_work, cpu);
+}
+
+static void push_irq_work_func(struct irq_work *work)
+{
+	struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work);
+
+	try_to_push_tasks(rt_rq);
+}
+#endif /* HAVE_RT_PUSH_IPI */
+
 static int pull_rt_task(struct rq *this_rq)
 {
 	int this_cpu = this_rq->cpu, ret = 0, cpu;
@@ -1793,6 +1963,13 @@
 	 */
 	smp_rmb();
 
+#ifdef HAVE_RT_PUSH_IPI
+	if (sched_feat(RT_PUSH_IPI)) {
+		tell_cpu_to_push(this_rq);
+		return 0;
+	}
+#endif
+
 	for_each_cpu(cpu, this_rq->rd->rto_mask) {
 		if (this_cpu == cpu)
 			continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dc0f435..e0e1299 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
+#include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
 
@@ -362,8 +363,14 @@
 	 * Under CFS, load is tracked on a per-entity basis and aggregated up.
 	 * This allows for the description of both thread and group usage (in
 	 * the FAIR_GROUP_SCHED case).
+	 * runnable_load_avg is the sum of the load_avg_contrib of the
+	 * sched_entities on the rq.
+	 * blocked_load_avg is similar to runnable_load_avg except that its
+	 * the blocked sched_entities on the rq.
+	 * utilization_load_avg is the sum of the average running time of the
+	 * sched_entities on the rq.
 	 */
-	unsigned long runnable_load_avg, blocked_load_avg;
+	unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg;
 	atomic64_t decay_counter;
 	u64 last_decay;
 	atomic_long_t removed_load;
@@ -418,6 +425,11 @@
 	return sysctl_sched_rt_runtime >= 0;
 }
 
+/* RT IPI pull logic requires IRQ_WORK */
+#ifdef CONFIG_IRQ_WORK
+# define HAVE_RT_PUSH_IPI
+#endif
+
 /* Real-Time classes' related field in a runqueue: */
 struct rt_rq {
 	struct rt_prio_array active;
@@ -435,7 +447,13 @@
 	unsigned long rt_nr_total;
 	int overloaded;
 	struct plist_head pushable_tasks;
+#ifdef HAVE_RT_PUSH_IPI
+	int push_flags;
+	int push_cpu;
+	struct irq_work push_work;
+	raw_spinlock_t push_lock;
 #endif
+#endif /* CONFIG_SMP */
 	int rt_queued;
 
 	int rt_throttled;
@@ -597,6 +615,7 @@
 	struct sched_domain *sd;
 
 	unsigned long cpu_capacity;
+	unsigned long cpu_capacity_orig;
 
 	unsigned char idle_balance;
 	/* For active balancing */
@@ -807,7 +826,7 @@
 	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
 	 * for a single CPU.
 	 */
-	unsigned int capacity, capacity_orig;
+	unsigned int capacity;
 	unsigned long next_update;
 	int imbalance; /* XXX unrelated to capacity but shared group state */
 	/*
@@ -1368,9 +1387,18 @@
 
 #ifdef CONFIG_SMP
 extern void sched_avg_update(struct rq *rq);
+
+#ifndef arch_scale_freq_capacity
+static __always_inline
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
-	rq->rt_avg += rt_delta;
+	rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
 	sched_avg_update(rq);
 }
 #else
@@ -1643,8 +1671,8 @@
 extern void print_dl_stats(struct seq_file *m, int cpu);
 
 extern void init_cfs_rq(struct cfs_rq *cfs_rq);
-extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
-extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq);
+extern void init_rt_rq(struct rt_rq *rt_rq);
+extern void init_dl_rq(struct dl_rq *dl_rq);
 
 extern void cfs_bandwidth_usage_inc(void);
 extern void cfs_bandwidth_usage_dec(void);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index d626dc9..579ce1b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -33,12 +33,6 @@
 config GENERIC_CLOCKEVENTS
 	bool
 
-# Migration helper. Builds, but does not invoke
-config GENERIC_CLOCKEVENTS_BUILD
-	bool
-	default y
-	depends on GENERIC_CLOCKEVENTS
-
 # Architecture can handle broadcast in a driver-agnostic way
 config ARCH_HAS_TICK_BROADCAST
 	bool
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index c09c078..01f0312 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -2,15 +2,13 @@
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
 
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
-obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
+obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= clockevents.o tick-common.o
 ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
  obj-y						+= tick-broadcast.o
  obj-$(CONFIG_TICK_ONESHOT)			+= tick-broadcast-hrtimer.o
 endif
 obj-$(CONFIG_GENERIC_SCHED_CLOCK)		+= sched_clock.o
-obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o
-obj-$(CONFIG_TICK_ONESHOT)			+= tick-sched.o
+obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o tick-sched.o
 obj-$(CONFIG_TIMER_STATS)			+= timer_stats.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5544990..25d942d 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,25 +94,76 @@
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 
+static int __clockevents_set_state(struct clock_event_device *dev,
+				   enum clock_event_state state)
+{
+	/* Transition with legacy set_mode() callback */
+	if (dev->set_mode) {
+		/* Legacy callback doesn't support new modes */
+		if (state > CLOCK_EVT_STATE_ONESHOT)
+			return -ENOSYS;
+		/*
+		 * 'clock_event_state' and 'clock_event_mode' have 1-to-1
+		 * mapping until *_ONESHOT, and so a simple cast will work.
+		 */
+		dev->set_mode((enum clock_event_mode)state, dev);
+		dev->mode = (enum clock_event_mode)state;
+		return 0;
+	}
+
+	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+		return 0;
+
+	/* Transition with new state-specific callbacks */
+	switch (state) {
+	case CLOCK_EVT_STATE_DETACHED:
+		/*
+		 * This is an internal state, which is guaranteed to go from
+		 * SHUTDOWN to DETACHED. No driver interaction required.
+		 */
+		return 0;
+
+	case CLOCK_EVT_STATE_SHUTDOWN:
+		return dev->set_state_shutdown(dev);
+
+	case CLOCK_EVT_STATE_PERIODIC:
+		/* Core internal bug */
+		if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+			return -ENOSYS;
+		return dev->set_state_periodic(dev);
+
+	case CLOCK_EVT_STATE_ONESHOT:
+		/* Core internal bug */
+		if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+			return -ENOSYS;
+		return dev->set_state_oneshot(dev);
+
+	default:
+		return -ENOSYS;
+	}
+}
+
 /**
- * clockevents_set_mode - set the operating mode of a clock event device
+ * clockevents_set_state - set the operating state of a clock event device
  * @dev:	device to modify
- * @mode:	new mode
+ * @state:	new state
  *
  * Must be called with interrupts disabled !
  */
-void clockevents_set_mode(struct clock_event_device *dev,
-				 enum clock_event_mode mode)
+void clockevents_set_state(struct clock_event_device *dev,
+			   enum clock_event_state state)
 {
-	if (dev->mode != mode) {
-		dev->set_mode(mode, dev);
-		dev->mode = mode;
+	if (dev->state != state) {
+		if (__clockevents_set_state(dev, state))
+			return;
+
+		dev->state = state;
 
 		/*
 		 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
 		 * on it, so fix it up and emit a warning:
 		 */
-		if (mode == CLOCK_EVT_MODE_ONESHOT) {
+		if (state == CLOCK_EVT_STATE_ONESHOT) {
 			if (unlikely(!dev->mult)) {
 				dev->mult = 1;
 				WARN_ON(1);
@@ -127,10 +178,28 @@
  */
 void clockevents_shutdown(struct clock_event_device *dev)
 {
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 	dev->next_event.tv64 = KTIME_MAX;
 }
 
+/**
+ * clockevents_tick_resume -	Resume the tick device before using it again
+ * @dev:			device to resume
+ */
+int clockevents_tick_resume(struct clock_event_device *dev)
+{
+	int ret = 0;
+
+	if (dev->set_mode) {
+		dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
+		dev->mode = CLOCK_EVT_MODE_RESUME;
+	} else if (dev->tick_resume) {
+		ret = dev->tick_resume(dev);
+	}
+
+	return ret;
+}
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
 
 /* Limit min_delta to a jiffie */
@@ -183,7 +252,7 @@
 		delta = dev->min_delta_ns;
 		dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-		if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+		if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
 			return 0;
 
 		dev->retries++;
@@ -220,7 +289,7 @@
 	delta = dev->min_delta_ns;
 	dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-	if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+	if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
 		return 0;
 
 	dev->retries++;
@@ -252,7 +321,7 @@
 
 	dev->next_event = expires;
 
-	if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+	if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
 		return 0;
 
 	/* Shortcut for clockevent devices that can deal with ktime. */
@@ -297,7 +366,7 @@
 	struct clock_event_device *dev, *newdev = NULL;
 
 	list_for_each_entry(dev, &clockevent_devices, list) {
-		if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+		if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
 			continue;
 
 		if (!tick_check_replacement(newdev, dev))
@@ -323,7 +392,7 @@
 static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
 {
 	/* Fast track. Device is unused */
-	if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+	if (ced->state == CLOCK_EVT_STATE_DETACHED) {
 		list_del_init(&ced->list);
 		return 0;
 	}
@@ -373,6 +442,37 @@
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
 
+/* Sanity check of state transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+	/* Legacy set_mode() callback */
+	if (dev->set_mode) {
+		/* We shouldn't be supporting new modes now */
+		WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
+			dev->set_state_shutdown || dev->tick_resume);
+
+		BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+		return 0;
+	}
+
+	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+		return 0;
+
+	/* New state-specific callbacks */
+	if (!dev->set_state_shutdown)
+		return -EINVAL;
+
+	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+	    !dev->set_state_periodic)
+		return -EINVAL;
+
+	if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+	    !dev->set_state_oneshot)
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * clockevents_register_device - register a clock event device
  * @dev:	device to register
@@ -381,7 +481,11 @@
 {
 	unsigned long flags;
 
-	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	BUG_ON(clockevents_sanity_check(dev));
+
+	/* Initialize state to DETACHED */
+	dev->state = CLOCK_EVT_STATE_DETACHED;
+
 	if (!dev->cpumask) {
 		WARN_ON(num_possible_cpus() > 1);
 		dev->cpumask = cpumask_of(smp_processor_id());
@@ -445,11 +549,11 @@
 {
 	clockevents_config(dev, freq);
 
-	if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
+	if (dev->state == CLOCK_EVT_STATE_ONESHOT)
 		return clockevents_program_event(dev, dev->next_event, false);
 
-	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-		dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+	if (dev->state == CLOCK_EVT_STATE_PERIODIC)
+		return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
 
 	return 0;
 }
@@ -491,30 +595,27 @@
  * @old:	device to release (can be NULL)
  * @new:	device to request (can be NULL)
  *
- * Called from the notifier chain. clockevents_lock is held already
+ * Called from various tick functions with clockevents_lock held and
+ * interrupts disabled.
  */
 void clockevents_exchange_device(struct clock_event_device *old,
 				 struct clock_event_device *new)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
 	/*
 	 * Caller releases a clock event device. We queue it into the
 	 * released list and do a notify add later.
 	 */
 	if (old) {
 		module_put(old->owner);
-		clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
+		clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
 		list_del(&old->list);
 		list_add(&old->list, &clockevents_released);
 	}
 
 	if (new) {
-		BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
+		BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
 		clockevents_shutdown(new);
 	}
-	local_irq_restore(flags);
 }
 
 /**
@@ -541,74 +642,40 @@
 			dev->resume(dev);
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#ifdef CONFIG_HOTPLUG_CPU
 /**
- * clockevents_notify - notification about relevant events
- * Returns 0 on success, any other value on error
+ * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
  */
-int clockevents_notify(unsigned long reason, void *arg)
+void tick_cleanup_dead_cpu(int cpu)
 {
 	struct clock_event_device *dev, *tmp;
 	unsigned long flags;
-	int cpu, ret = 0;
 
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
-	switch (reason) {
-	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-		tick_broadcast_on_off(reason, arg);
-		break;
-
-	case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
-	case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
-		ret = tick_broadcast_oneshot_control(reason);
-		break;
-
-	case CLOCK_EVT_NOTIFY_CPU_DYING:
-		tick_handover_do_timer(arg);
-		break;
-
-	case CLOCK_EVT_NOTIFY_SUSPEND:
-		tick_suspend();
-		tick_suspend_broadcast();
-		break;
-
-	case CLOCK_EVT_NOTIFY_RESUME:
-		tick_resume();
-		break;
-
-	case CLOCK_EVT_NOTIFY_CPU_DEAD:
-		tick_shutdown_broadcast_oneshot(arg);
-		tick_shutdown_broadcast(arg);
-		tick_shutdown(arg);
-		/*
-		 * Unregister the clock event devices which were
-		 * released from the users in the notify chain.
-		 */
-		list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+	tick_shutdown_broadcast_oneshot(cpu);
+	tick_shutdown_broadcast(cpu);
+	tick_shutdown(cpu);
+	/*
+	 * Unregister the clock event devices which were
+	 * released from the users in the notify chain.
+	 */
+	list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+		list_del(&dev->list);
+	/*
+	 * Now check whether the CPU has left unused per cpu devices
+	 */
+	list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+		if (cpumask_test_cpu(cpu, dev->cpumask) &&
+		    cpumask_weight(dev->cpumask) == 1 &&
+		    !tick_is_broadcast_device(dev)) {
+			BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
 			list_del(&dev->list);
-		/*
-		 * Now check whether the CPU has left unused per cpu devices
-		 */
-		cpu = *((int *)arg);
-		list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
-			if (cpumask_test_cpu(cpu, dev->cpumask) &&
-			    cpumask_weight(dev->cpumask) == 1 &&
-			    !tick_is_broadcast_device(dev)) {
-				BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
-				list_del(&dev->list);
-			}
 		}
-		break;
-	default:
-		break;
 	}
 	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
-	return ret;
 }
-EXPORT_SYMBOL_GPL(clockevents_notify);
+#endif
 
 #ifdef CONFIG_SYSFS
 struct bus_type clockevents_subsys = {
@@ -727,5 +794,3 @@
 }
 device_initcall(clockevents_init_sysfs);
 #endif /* SYSFS */
-
-#endif /* GENERIC_CLOCK_EVENTS */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4892352..15facb1 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@
 		schedule_work(&watchdog_work);
 }
 
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-	       cs->name, delta);
-	__clocksource_unstable(cs);
-}
-
 /**
  * clocksource_mark_unstable - mark clocksource unstable via watchdog
  * @cs:		clocksource to be marked unstable
@@ -174,7 +167,7 @@
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
-	cycle_t csnow, wdnow, delta;
+	cycle_t csnow, wdnow, cslast, wdlast, delta;
 	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
 
@@ -213,6 +206,8 @@
 
 		delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 		cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+		wdlast = cs->wd_last; /* save these in case we print them */
+		cslast = cs->cs_last;
 		cs->cs_last = csnow;
 		cs->wd_last = wdnow;
 
@@ -221,7 +216,12 @@
 
 		/* Check the deviation from the watchdog clocksource. */
 		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-			clocksource_unstable(cs, cs_nsec - wd_nsec);
+			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+			pr_warn("	'%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+				watchdog->name, wdnow, wdlast, watchdog->mask);
+			pr_warn("	'%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+				cs->name, csnow, cslast, cs->mask);
+			__clocksource_unstable(cs);
 			continue;
 		}
 
@@ -469,26 +469,25 @@
  * @shift:	cycle to nanosecond divisor (power of two)
  * @maxadj:	maximum adjustment value to mult (~11%)
  * @mask:	bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:	maximum cycle value before potential overflow (does not include
+ *		any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, in other words, we
+ * return half the number of nanoseconds the hardware counter can technically
+ * cover. This is done so that we can potentially detect problems caused by
+ * delayed timers or bad hardware, which might result in time intervals that
+ * are larger then what the math used can handle without overflows.
  */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
 	u64 max_nsecs, max_cycles;
 
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
-	 * cyc2ns function without overflowing a 64-bit signed result. The
-	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-	 * which is equivalent to the below.
-	 * max_cycles < (2^63)/(mult + maxadj)
-	 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-	 * max_cycles < 2^(63 - log2(mult + maxadj))
-	 * max_cycles < 1 << (63 - log2(mult + maxadj))
-	 * Please note that we add 1 to the result of the log2 to account for
-	 * any rounding errors, ensure the above inequality is satisfied and
-	 * no overflow will occur.
+	 * cyc2ns() function without overflowing a 64-bit result.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+	max_cycles = ULLONG_MAX;
+	do_div(max_cycles, mult+maxadj);
 
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +498,26 @@
 	max_cycles = min(max_cycles, mask);
 	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 
+	/* return the max_cycles value as well if requested */
+	if (max_cyc)
+		*max_cyc = max_cycles;
+
+	/* Return 50% of the actual maximum, so we can detect bad values */
+	max_nsecs >>= 1;
+
 	return max_nsecs;
 }
 
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
- * @cs:         Pointer to clocksource
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
+ * @cs:         Pointer to clocksource to be updated
  *
  */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
 {
-	u64 max_nsecs;
-
-	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
-					  cs->mask);
-	/*
-	 * To ensure that the clocksource does not wrap whilst we are idle,
-	 * limit the time the clocksource can be deferred by 12.5%. Please
-	 * note a margin of 12.5% is used because this can be computed with
-	 * a shift, versus say 10% which would require division.
-	 */
-	return max_nsecs - (max_nsecs >> 3);
+	cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+						cs->maxadj, cs->mask,
+						&cs->max_cycles);
 }
 
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +646,7 @@
 }
 
 /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
  * @cs:		clocksource to be registered
  * @scale:	Scale factor multiplied against freq to get clocksource hz
  * @freq:	clocksource frequency (cycles per second) divided by scale
@@ -656,48 +654,64 @@
  * This should only be called from the clocksource->enable() method.
  *
  * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
  */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
+
 	/*
-	 * Calc the maximum number of seconds which we can run before
-	 * wrapping around. For clocksources which have a mask > 32bit
-	 * we need to limit the max sleep time to have a good
-	 * conversion precision. 10 minutes is still a reasonable
-	 * amount. That results in a shift value of 24 for a
-	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
-	 * margin as we do in clocksource_max_deferment()
+	 * Default clocksources are *special* and self-define their mult/shift.
+	 * But, you're not special, so you should specify a freq value.
 	 */
-	sec = (cs->mask - (cs->mask >> 3));
-	do_div(sec, freq);
-	do_div(sec, scale);
-	if (!sec)
-		sec = 1;
-	else if (sec > 600 && cs->mask > UINT_MAX)
-		sec = 600;
+	if (freq) {
+		/*
+		 * Calc the maximum number of seconds which we can run before
+		 * wrapping around. For clocksources which have a mask > 32-bit
+		 * we need to limit the max sleep time to have a good
+		 * conversion precision. 10 minutes is still a reasonable
+		 * amount. That results in a shift value of 24 for a
+		 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
+		 * ~ 0.06ppm granularity for NTP.
+		 */
+		sec = cs->mask;
+		do_div(sec, freq);
+		do_div(sec, scale);
+		if (!sec)
+			sec = 1;
+		else if (sec > 600 && cs->mask > UINT_MAX)
+			sec = 600;
 
-	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-			       NSEC_PER_SEC / scale, sec * scale);
-
+		clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+				       NSEC_PER_SEC / scale, sec * scale);
+	}
 	/*
-	 * for clocksources that have large mults, to avoid overflow.
-	 * Since mult may be adjusted by ntp, add an safety extra margin
-	 *
+	 * Ensure clocksources that have large 'mult' values don't overflow
+	 * when adjusted.
 	 */
 	cs->maxadj = clocksource_max_adjustment(cs);
-	while ((cs->mult + cs->maxadj < cs->mult)
-		|| (cs->mult - cs->maxadj > cs->mult)) {
+	while (freq && ((cs->mult + cs->maxadj < cs->mult)
+		|| (cs->mult - cs->maxadj > cs->mult))) {
 		cs->mult >>= 1;
 		cs->shift--;
 		cs->maxadj = clocksource_max_adjustment(cs);
 	}
 
-	cs->max_idle_ns = clocksource_max_deferment(cs);
+	/*
+	 * Only warn for *special* clocksources that self-define
+	 * their mult/shift values and don't specify a freq.
+	 */
+	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+		"timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+		cs->name);
+
+	clocksource_update_max_deferment(cs);
+
+	pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+			cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 
 /**
  * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +728,7 @@
 {
 
 	/* Initialize mult/shift and max_idle_ns */
-	__clocksource_updatefreq_scale(cs, scale, freq);
+	__clocksource_update_freq_scale(cs, scale, freq);
 
 	/* Add clocksource to the clocksource list */
 	mutex_lock(&clocksource_mutex);
@@ -726,33 +740,6 @@
 }
 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 
-
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:		clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-	/* calculate max adjustment for given mult/shift */
-	cs->maxadj = clocksource_max_adjustment(cs);
-	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-		"Clocksource %s might overflow on 11%% adjustment\n",
-		cs->name);
-
-	/* calculate max idle time permitted for this clocksource */
-	cs->max_idle_ns = clocksource_max_deferment(cs);
-
-	mutex_lock(&clocksource_mutex);
-	clocksource_enqueue(cs);
-	clocksource_enqueue_watchdog(cs);
-	clocksource_select();
-	mutex_unlock(&clocksource_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
-
 static void __clocksource_change_rating(struct clocksource *cs, int rating)
 {
 	list_del(&cs->list);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bee0c1f..76d4bd9 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -54,7 +54,7 @@
 
 #include <trace/events/timer.h>
 
-#include "timekeeping.h"
+#include "tick-internal.h"
 
 /*
  * The timer bases:
@@ -1707,17 +1707,10 @@
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
-		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-	{
-		clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
 		migrate_hrtimers(scpu);
 		break;
-	}
 #endif
 
 	default:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a6a5bf5..347fecf 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include "tick-internal.h"
+#include "timekeeping.h"
 
 /* The Jiffies based clocksource is the lowest common
  * denominator clock source which should function on
@@ -71,6 +71,7 @@
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
 	.shift		= JIFFIES_SHIFT,
+	.max_cycles	= 10,
 };
 
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@
 
 static int __init init_jiffies_clocksource(void)
 {
-	return clocksource_register(&clocksource_jiffies);
+	return __clocksource_register(&clocksource_jiffies);
 }
 
 core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@
 
 	refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
 
-	clocksource_register(&refined_jiffies);
+	__clocksource_register(&refined_jiffies);
 	return 0;
 }
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 0f60b08..7a68100 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 
-#include "tick-internal.h"
 #include "ntp_internal.h"
 
 /*
@@ -459,6 +458,16 @@
 	return leap;
 }
 
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+int __weak update_persistent_clock64(struct timespec64 now64)
+{
+	struct timespec now;
+
+	now = timespec64_to_timespec(now64);
+	return update_persistent_clock(now);
+}
+#endif
+
 #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
 static void sync_cmos_clock(struct work_struct *work);
 
@@ -494,8 +503,9 @@
 		if (persistent_clock_is_local)
 			adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-		fail = update_persistent_clock(timespec64_to_timespec(adjust));
+		fail = update_persistent_clock64(adjust);
 #endif
+
 #ifdef CONFIG_RTC_SYSTOHC
 		if (fail == -ENODEV)
 			fail = rtc_set_ntp_time(adjust);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15..a26036d 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
 /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
 
-struct clock_data {
-	ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:		sched_clock() value at last update
+ * @epoch_cyc:		Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *			clocks.
+ * @read_sched_clock:	Current clock source (or dummy source when suspended).
+ * @mult:		Multipler for scaled math conversion.
+ * @shift:		Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
 	u64 epoch_ns;
 	u64 epoch_cyc;
-	seqcount_t seq;
-	unsigned long rate;
+	u64 sched_clock_mask;
+	u64 (*read_sched_clock)(void);
 	u32 mult;
 	u32 shift;
-	bool suspended;
+};
+
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ *                     registration of a new clock source)
+ *
+ * @seq:		Sequence counter for protecting updates. The lowest
+ *			bit is the index for @read_data.
+ * @read_data:		Data required to read from sched_clock.
+ * @wrap_kt:		Duration for which clock can run before wrapping.
+ * @rate:		Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+	seqcount_t		seq;
+	struct clock_read_data	read_data[2];
+	ktime_t			wrap_kt;
+	unsigned long		rate;
+
+	u64 (*actual_read_sched_clock)(void);
 };
 
 static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@
 
 core_param(irqtime, irqtime, int, 0400);
 
-static struct clock_data cd = {
-	.mult	= NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
 static u64 notrace jiffy_sched_clock_read(void)
 {
 	/*
@@ -49,7 +82,11 @@
 	return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+	.read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+			  .read_sched_clock = jiffy_sched_clock_read, },
+	.actual_read_sched_clock = jiffy_sched_clock_read,
+};
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -58,111 +95,136 @@
 
 unsigned long long notrace sched_clock(void)
 {
-	u64 epoch_ns;
-	u64 epoch_cyc;
-	u64 cyc;
+	u64 cyc, res;
 	unsigned long seq;
-
-	if (cd.suspended)
-		return cd.epoch_ns;
+	struct clock_read_data *rd;
 
 	do {
-		seq = raw_read_seqcount_begin(&cd.seq);
-		epoch_cyc = cd.epoch_cyc;
-		epoch_ns = cd.epoch_ns;
+		seq = raw_read_seqcount(&cd.seq);
+		rd = cd.read_data + (seq & 1);
+
+		cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+		      rd->sched_clock_mask;
+		res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
 	} while (read_seqcount_retry(&cd.seq, seq));
 
-	cyc = read_sched_clock();
-	cyc = (cyc - epoch_cyc) & sched_clock_mask;
-	return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+	return res;
 }
 
 /*
- * Atomically update the sched_clock epoch.
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
  */
-static void notrace update_sched_clock(void)
+static void update_clock_read_data(struct clock_read_data *rd)
 {
-	unsigned long flags;
+	/* update the backup (odd) copy with the new data */
+	cd.read_data[1] = *rd;
+
+	/* steer readers towards the odd copy */
+	raw_write_seqcount_latch(&cd.seq);
+
+	/* now its safe for us to update the normal (even) copy */
+	cd.read_data[0] = *rd;
+
+	/* switch readers back to the even copy */
+	raw_write_seqcount_latch(&cd.seq);
+}
+
+/*
+ * Atomically update the sched_clock() epoch.
+ */
+static void update_sched_clock(void)
+{
 	u64 cyc;
 	u64 ns;
+	struct clock_read_data rd;
 
-	cyc = read_sched_clock();
-	ns = cd.epoch_ns +
-		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-			  cd.mult, cd.shift);
+	rd = cd.read_data[0];
 
-	raw_local_irq_save(flags);
-	raw_write_seqcount_begin(&cd.seq);
-	cd.epoch_ns = ns;
-	cd.epoch_cyc = cyc;
-	raw_write_seqcount_end(&cd.seq);
-	raw_local_irq_restore(flags);
+	cyc = cd.actual_read_sched_clock();
+	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+
+	rd.epoch_ns = ns;
+	rd.epoch_cyc = cyc;
+
+	update_clock_read_data(&rd);
 }
 
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 {
 	update_sched_clock();
 	hrtimer_forward_now(hrt, cd.wrap_kt);
+
 	return HRTIMER_RESTART;
 }
 
-void __init sched_clock_register(u64 (*read)(void), int bits,
-				 unsigned long rate)
+void __init
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 {
 	u64 res, wrap, new_mask, new_epoch, cyc, ns;
 	u32 new_mult, new_shift;
-	ktime_t new_wrap_kt;
 	unsigned long r;
 	char r_unit;
+	struct clock_read_data rd;
 
 	if (cd.rate > rate)
 		return;
 
 	WARN_ON(!irqs_disabled());
 
-	/* calculate the mult/shift to convert counter ticks to ns. */
+	/* Calculate the mult/shift to convert counter ticks to ns. */
 	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
 
 	new_mask = CLOCKSOURCE_MASK(bits);
-
-	/* calculate how many ns until we wrap */
-	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-	new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
-
-	/* update epoch for new counter and update epoch_ns from old counter*/
-	new_epoch = read();
-	cyc = read_sched_clock();
-	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-			  cd.mult, cd.shift);
-
-	raw_write_seqcount_begin(&cd.seq);
-	read_sched_clock = read;
-	sched_clock_mask = new_mask;
 	cd.rate = rate;
-	cd.wrap_kt = new_wrap_kt;
-	cd.mult = new_mult;
-	cd.shift = new_shift;
-	cd.epoch_cyc = new_epoch;
-	cd.epoch_ns = ns;
-	raw_write_seqcount_end(&cd.seq);
+
+	/* Calculate how many nanosecs until we risk wrapping */
+	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+	cd.wrap_kt = ns_to_ktime(wrap);
+
+	rd = cd.read_data[0];
+
+	/* Update epoch for new counter and update 'epoch_ns' from old counter*/
+	new_epoch = read();
+	cyc = cd.actual_read_sched_clock();
+	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+	cd.actual_read_sched_clock = read;
+
+	rd.read_sched_clock	= read;
+	rd.sched_clock_mask	= new_mask;
+	rd.mult			= new_mult;
+	rd.shift		= new_shift;
+	rd.epoch_cyc		= new_epoch;
+	rd.epoch_ns		= ns;
+
+	update_clock_read_data(&rd);
 
 	r = rate;
 	if (r >= 4000000) {
 		r /= 1000000;
 		r_unit = 'M';
-	} else if (r >= 1000) {
-		r /= 1000;
-		r_unit = 'k';
-	} else
-		r_unit = ' ';
+	} else {
+		if (r >= 1000) {
+			r /= 1000;
+			r_unit = 'k';
+		} else {
+			r_unit = ' ';
+		}
+	}
 
-	/* calculate the ns resolution of this counter */
+	/* Calculate the ns resolution of this counter */
 	res = cyc_to_ns(1ULL, new_mult, new_shift);
 
 	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
 		bits, r, r_unit, res, wrap);
 
-	/* Enable IRQ time accounting if we have a fast enough sched_clock */
+	/* Enable IRQ time accounting if we have a fast enough sched_clock() */
 	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
 		enable_sched_clock_irqtime();
 
@@ -172,10 +234,10 @@
 void __init sched_clock_postinit(void)
 {
 	/*
-	 * If no sched_clock function has been provided at that point,
+	 * If no sched_clock() function has been provided at that point,
 	 * make it the final one one.
 	 */
-	if (read_sched_clock == jiffy_sched_clock_read)
+	if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
 		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
 	update_sched_clock();
@@ -189,29 +251,53 @@
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
 
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+	unsigned long seq = raw_read_seqcount(&cd.seq);
+
+	return cd.read_data[seq & 1].epoch_cyc;
+}
+
 static int sched_clock_suspend(void)
 {
+	struct clock_read_data *rd = &cd.read_data[0];
+
 	update_sched_clock();
 	hrtimer_cancel(&sched_clock_timer);
-	cd.suspended = true;
+	rd->read_sched_clock = suspended_sched_clock_read;
+
 	return 0;
 }
 
 static void sched_clock_resume(void)
 {
-	cd.epoch_cyc = read_sched_clock();
+	struct clock_read_data *rd = &cd.read_data[0];
+
+	rd->epoch_cyc = cd.actual_read_sched_clock();
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-	cd.suspended = false;
+	rd->read_sched_clock = cd.actual_read_sched_clock;
 }
 
 static struct syscore_ops sched_clock_ops = {
-	.suspend = sched_clock_suspend,
-	.resume = sched_clock_resume,
+	.suspend	= sched_clock_suspend,
+	.resume		= sched_clock_resume,
 };
 
 static int __init sched_clock_syscore_init(void)
 {
 	register_syscore_ops(&sched_clock_ops);
+
 	return 0;
 }
 device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 066f0ec..7e8ca4f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -33,12 +33,14 @@
 static cpumask_var_t tick_broadcast_on;
 static cpumask_var_t tmpmask;
 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
-static int tick_broadcast_force;
+static int tick_broadcast_forced;
 
 #ifdef CONFIG_TICK_ONESHOT
 static void tick_broadcast_clear_oneshot(int cpu);
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
+static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
 
 /*
@@ -303,7 +305,7 @@
 	/*
 	 * The device is in periodic mode. No reprogramming necessary:
 	 */
-	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
+	if (dev->state == CLOCK_EVT_STATE_PERIODIC)
 		goto unlock;
 
 	/*
@@ -324,49 +326,54 @@
 	raw_spin_unlock(&tick_broadcast_lock);
 }
 
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_control - Enable/disable or force broadcast mode
+ * @mode:	The selected broadcast mode
+ *
+ * Called when the system enters a state where affected tick devices
+ * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-static void tick_do_broadcast_on_off(unsigned long *reason)
+void tick_broadcast_control(enum tick_broadcast_mode mode)
 {
 	struct clock_event_device *bc, *dev;
 	struct tick_device *td;
-	unsigned long flags;
 	int cpu, bc_stopped;
 
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
-	cpu = smp_processor_id();
-	td = &per_cpu(tick_cpu_device, cpu);
+	td = this_cpu_ptr(&tick_cpu_device);
 	dev = td->evtdev;
-	bc = tick_broadcast_device.evtdev;
 
 	/*
 	 * Is the device not affected by the powerstate ?
 	 */
 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
-		goto out;
+		return;
 
 	if (!tick_device_is_functional(dev))
-		goto out;
+		return;
 
+	raw_spin_lock(&tick_broadcast_lock);
+	cpu = smp_processor_id();
+	bc = tick_broadcast_device.evtdev;
 	bc_stopped = cpumask_empty(tick_broadcast_mask);
 
-	switch (*reason) {
-	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+	switch (mode) {
+	case TICK_BROADCAST_FORCE:
+		tick_broadcast_forced = 1;
+	case TICK_BROADCAST_ON:
 		cpumask_set_cpu(cpu, tick_broadcast_on);
 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
 		}
-		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
-			tick_broadcast_force = 1;
 		break;
-	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-		if (tick_broadcast_force)
+
+	case TICK_BROADCAST_OFF:
+		if (tick_broadcast_forced)
 			break;
 		cpumask_clear_cpu(cpu, tick_broadcast_on);
 		if (!tick_device_is_functional(dev))
@@ -388,22 +395,9 @@
 		else
 			tick_broadcast_setup_oneshot(bc);
 	}
-out:
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+	raw_spin_unlock(&tick_broadcast_lock);
 }
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop.
- */
-void tick_broadcast_on_off(unsigned long reason, int *oncpu)
-{
-	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
-		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
-		       "offline CPU #%d\n", *oncpu);
-	else
-		tick_do_broadcast_on_off(&reason);
-}
+EXPORT_SYMBOL_GPL(tick_broadcast_control);
 
 /*
  * Set the periodic handler depending on broadcast on/off
@@ -416,14 +410,14 @@
 		dev->event_handler = tick_handle_periodic_broadcast;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Remove a CPU from broadcasting
  */
-void tick_shutdown_broadcast(unsigned int *cpup)
+void tick_shutdown_broadcast(unsigned int cpu)
 {
 	struct clock_event_device *bc;
 	unsigned long flags;
-	unsigned int cpu = *cpup;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -438,6 +432,7 @@
 
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 void tick_suspend_broadcast(void)
 {
@@ -453,38 +448,48 @@
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-int tick_resume_broadcast(void)
+/*
+ * This is called from tick_resume_local() on a resuming CPU. That's
+ * called from the core resume function, tick_unfreeze() and the magic XEN
+ * resume hackery.
+ *
+ * In none of these cases the broadcast device mode can change and the
+ * bit of the resuming CPU in the broadcast mask is safe as well.
+ */
+bool tick_resume_check_broadcast(void)
+{
+	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
+		return false;
+	else
+		return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
+}
+
+void tick_resume_broadcast(void)
 {
 	struct clock_event_device *bc;
 	unsigned long flags;
-	int broadcast = 0;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
 	bc = tick_broadcast_device.evtdev;
 
 	if (bc) {
-		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
+		clockevents_tick_resume(bc);
 
 		switch (tick_broadcast_device.mode) {
 		case TICKDEV_MODE_PERIODIC:
 			if (!cpumask_empty(tick_broadcast_mask))
 				tick_broadcast_start_periodic(bc);
-			broadcast = cpumask_test_cpu(smp_processor_id(),
-						     tick_broadcast_mask);
 			break;
 		case TICKDEV_MODE_ONESHOT:
 			if (!cpumask_empty(tick_broadcast_mask))
-				broadcast = tick_resume_broadcast_oneshot(bc);
+				tick_resume_broadcast_oneshot(bc);
 			break;
 		}
 	}
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-
-	return broadcast;
 }
 
-
 #ifdef CONFIG_TICK_ONESHOT
 
 static cpumask_var_t tick_broadcast_oneshot_mask;
@@ -532,8 +537,8 @@
 {
 	int ret;
 
-	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
-		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+	if (bc->state != CLOCK_EVT_STATE_ONESHOT)
+		clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 
 	ret = clockevents_program_event(bc, expires, force);
 	if (!ret)
@@ -541,10 +546,9 @@
 	return ret;
 }
 
-int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
-	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-	return 0;
+	clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 }
 
 /*
@@ -562,8 +566,8 @@
 		 * switched over, leave the device alone.
 		 */
 		if (td->mode == TICKDEV_MODE_ONESHOT) {
-			clockevents_set_mode(td->evtdev,
-					     CLOCK_EVT_MODE_ONESHOT);
+			clockevents_set_state(td->evtdev,
+					      CLOCK_EVT_STATE_ONESHOT);
 		}
 	}
 }
@@ -666,31 +670,26 @@
 		if (dev->next_event.tv64 < bc->next_event.tv64)
 			return;
 	}
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-static void broadcast_move_bc(int deadcpu)
-{
-	struct clock_event_device *bc = tick_broadcast_device.evtdev;
-
-	if (!bc || !broadcast_needs_cpu(bc, deadcpu))
-		return;
-	/* This moves the broadcast assignment to this cpu */
-	clockevents_program_event(bc, bc->next_event, 1);
-}
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
+ * @state:	The target state (enter/exit)
+ *
+ * The system enters/leaves a state, where affected devices might stop
  * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-int tick_broadcast_oneshot_control(unsigned long reason)
+int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
 	struct clock_event_device *bc, *dev;
 	struct tick_device *td;
-	unsigned long flags;
-	ktime_t now;
 	int cpu, ret = 0;
+	ktime_t now;
 
 	/*
 	 * Periodic mode does not care about the enter/exit of power
@@ -703,17 +702,17 @@
 	 * We are called with preemtion disabled from the depth of the
 	 * idle code, so we can't be moved away.
 	 */
-	cpu = smp_processor_id();
-	td = &per_cpu(tick_cpu_device, cpu);
+	td = this_cpu_ptr(&tick_cpu_device);
 	dev = td->evtdev;
 
 	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 		return 0;
 
+	raw_spin_lock(&tick_broadcast_lock);
 	bc = tick_broadcast_device.evtdev;
+	cpu = smp_processor_id();
 
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
+	if (state == TICK_BROADCAST_ENTER) {
 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
 			broadcast_shutdown_local(bc, dev);
@@ -741,7 +740,7 @@
 			cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 	} else {
 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
-			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+			clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 			/*
 			 * The cpu which was handling the broadcast
 			 * timer marked this cpu in the broadcast
@@ -805,9 +804,10 @@
 		}
 	}
 out:
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+	raw_spin_unlock(&tick_broadcast_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 
 /*
  * Reset the one shot broadcast for a cpu
@@ -842,7 +842,7 @@
 
 	/* Set it up only once ! */
 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
-		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
+		int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
 
 		bc->event_handler = tick_handle_oneshot_broadcast;
 
@@ -858,7 +858,7 @@
 			   tick_broadcast_oneshot_mask, tmpmask);
 
 		if (was_periodic && !cpumask_empty(tmpmask)) {
-			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+			clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 			tick_broadcast_init_next_event(tmpmask,
 						       tick_next_period);
 			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
@@ -894,14 +894,28 @@
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void hotplug_cpu__broadcast_tick_pull(int deadcpu)
+{
+	struct clock_event_device *bc;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+	bc = tick_broadcast_device.evtdev;
+
+	if (bc && broadcast_needs_cpu(bc, deadcpu)) {
+		/* This moves the broadcast assignment to this CPU: */
+		clockevents_program_event(bc, bc->next_event, 1);
+	}
+	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
 
 /*
  * Remove a dead CPU from broadcasting
  */
-void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
+void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 {
 	unsigned long flags;
-	unsigned int cpu = *cpup;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -913,10 +927,9 @@
 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 
-	broadcast_move_bc(cpu);
-
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 /*
  * Check, whether the broadcast device is in one shot mode
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index f7c5155..3ae6afa 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -102,7 +102,7 @@
 
 	tick_periodic(cpu);
 
-	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+	if (dev->state != CLOCK_EVT_STATE_ONESHOT)
 		return;
 	for (;;) {
 		/*
@@ -140,7 +140,7 @@
 
 	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
 	    !tick_broadcast_oneshot_active()) {
-		clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
+		clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
 	} else {
 		unsigned long seq;
 		ktime_t next;
@@ -150,7 +150,7 @@
 			next = tick_next_period;
 		} while (read_seqretry(&jiffies_lock, seq));
 
-		clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+		clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 
 		for (;;) {
 			if (!clockevents_program_event(dev, next, false))
@@ -332,14 +332,16 @@
 	tick_install_broadcast_device(newdev);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Transfer the do_timer job away from a dying cpu.
  *
- * Called with interrupts disabled.
+ * Called with interrupts disabled. Not locking required. If
+ * tick_do_timer_cpu is owned by this cpu, nothing can change it.
  */
-void tick_handover_do_timer(int *cpup)
+void tick_handover_do_timer(void)
 {
-	if (*cpup == tick_do_timer_cpu) {
+	if (tick_do_timer_cpu == smp_processor_id()) {
 		int cpu = cpumask_first(cpu_online_mask);
 
 		tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
@@ -354,9 +356,9 @@
  * access the hardware device itself.
  * We just set the mode and remove it from the lists.
  */
-void tick_shutdown(unsigned int *cpup)
+void tick_shutdown(unsigned int cpu)
 {
-	struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
+	struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 	struct clock_event_device *dev = td->evtdev;
 
 	td->mode = TICKDEV_MODE_PERIODIC;
@@ -365,27 +367,42 @@
 		 * Prevent that the clock events layer tries to call
 		 * the set mode function!
 		 */
+		dev->state = CLOCK_EVT_STATE_DETACHED;
 		dev->mode = CLOCK_EVT_MODE_UNUSED;
 		clockevents_exchange_device(dev, NULL);
 		dev->event_handler = clockevents_handle_noop;
 		td->evtdev = NULL;
 	}
 }
+#endif
 
-void tick_suspend(void)
+/**
+ * tick_suspend_local - Suspend the local tick device
+ *
+ * Called from the local cpu for freeze with interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend_local(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 
 	clockevents_shutdown(td->evtdev);
 }
 
-void tick_resume(void)
+/**
+ * tick_resume_local - Resume the local tick device
+ *
+ * Called from the local CPU for unfreeze or XEN resume magic.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume_local(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
-	int broadcast = tick_resume_broadcast();
+	bool broadcast = tick_resume_check_broadcast();
 
-	clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
-
+	clockevents_tick_resume(td->evtdev);
 	if (!broadcast) {
 		if (td->mode == TICKDEV_MODE_PERIODIC)
 			tick_setup_periodic(td->evtdev, 0);
@@ -394,6 +411,35 @@
 	}
 }
 
+/**
+ * tick_suspend - Suspend the tick and the broadcast device
+ *
+ * Called from syscore_suspend() via timekeeping_suspend with only one
+ * CPU online and interrupts disabled or from tick_unfreeze() under
+ * tick_freeze_lock.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend(void)
+{
+	tick_suspend_local();
+	tick_suspend_broadcast();
+}
+
+/**
+ * tick_resume - Resume the tick and the broadcast device
+ *
+ * Called from syscore_resume() via timekeeping_resume with only one
+ * CPU online and interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume(void)
+{
+	tick_resume_broadcast();
+	tick_resume_local();
+}
+
 static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
 static unsigned int tick_freeze_depth;
 
@@ -411,12 +457,10 @@
 	raw_spin_lock(&tick_freeze_lock);
 
 	tick_freeze_depth++;
-	if (tick_freeze_depth == num_online_cpus()) {
+	if (tick_freeze_depth == num_online_cpus())
 		timekeeping_suspend();
-	} else {
-		tick_suspend();
-		tick_suspend_broadcast();
-	}
+	else
+		tick_suspend_local();
 
 	raw_spin_unlock(&tick_freeze_lock);
 }
@@ -437,7 +481,7 @@
 	if (tick_freeze_depth == num_online_cpus())
 		timekeeping_resume();
 	else
-		tick_resume();
+		tick_resume_local();
 
 	tick_freeze_depth--;
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 366aeb4..b64fdd8 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -5,15 +5,12 @@
 #include <linux/tick.h>
 
 #include "timekeeping.h"
+#include "tick-sched.h"
 
-extern seqlock_t jiffies_lock;
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#define CS_NAME_LEN	32
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
-
-#define TICK_DO_TIMER_NONE	-1
-#define TICK_DO_TIMER_BOOT	-2
+# define TICK_DO_TIMER_NONE	-1
+# define TICK_DO_TIMER_BOOT	-2
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 extern ktime_t tick_next_period;
@@ -23,21 +20,72 @@
 extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
 extern void tick_handle_periodic(struct clock_event_device *dev);
 extern void tick_check_new_device(struct clock_event_device *dev);
-extern void tick_handover_do_timer(int *cpup);
-extern void tick_shutdown(unsigned int *cpup);
+extern void tick_shutdown(unsigned int cpu);
 extern void tick_suspend(void);
 extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
 				   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
+extern int tick_is_oneshot_available(void);
+extern struct tick_device *tick_get_device(int cpu);
+
+extern int clockevents_tick_resume(struct clock_event_device *dev);
+/* Check, if the device is functional or a dummy for broadcast */
+static inline int tick_device_is_functional(struct clock_event_device *dev)
+{
+	return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
+}
 
 extern void clockevents_shutdown(struct clock_event_device *dev);
-
+extern void clockevents_exchange_device(struct clock_event_device *old,
+					struct clock_event_device *new);
+extern void clockevents_set_state(struct clock_event_device *dev,
+				 enum clock_event_state state);
+extern int clockevents_program_event(struct clock_event_device *dev,
+				     ktime_t expires, bool force);
+extern void clockevents_handle_noop(struct clock_event_device *dev);
+extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 
-/*
- * NO_HZ / high resolution timer shared code
- */
+/* Broadcasting support */
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
+extern void tick_install_broadcast_device(struct clock_event_device *dev);
+extern int tick_is_broadcast_device(struct clock_event_device *dev);
+extern void tick_shutdown_broadcast(unsigned int cpu);
+extern void tick_suspend_broadcast(void);
+extern void tick_resume_broadcast(void);
+extern bool tick_resume_check_broadcast(void);
+extern void tick_broadcast_init(void);
+extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
+extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
+extern struct tick_device *tick_get_broadcast_device(void);
+extern struct cpumask *tick_get_broadcast_mask(void);
+# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
+static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
+static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
+static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
+static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
+static inline void tick_shutdown_broadcast(unsigned int cpu) { }
+static inline void tick_suspend_broadcast(void) { }
+static inline void tick_resume_broadcast(void) { }
+static inline bool tick_resume_check_broadcast(void) { return false; }
+static inline void tick_broadcast_init(void) { }
+static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
+
+/* Set the periodic handler in non broadcast mode */
+static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
+{
+	dev->event_handler = tick_handle_periodic;
+}
+# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
+
+#else /* !GENERIC_CLOCKEVENTS: */
+static inline void tick_suspend(void) { }
+static inline void tick_resume(void) { }
+#endif /* !GENERIC_CLOCKEVENTS */
+
+/* Oneshot related functions */
 #ifdef CONFIG_TICK_ONESHOT
 extern void tick_setup_oneshot(struct clock_event_device *newdev,
 			       void (*handler)(struct clock_event_device *),
@@ -46,58 +94,42 @@
 extern void tick_oneshot_notify(void);
 extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
 extern void tick_resume_oneshot(void);
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_control(unsigned long reason);
-extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
-extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_active(void);
-extern void tick_check_oneshot_broadcast_this_cpu(void);
-bool tick_broadcast_oneshot_available(void);
-# else /* BROADCAST */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-	BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
-static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
-static inline bool tick_broadcast_oneshot_available(void) { return true; }
-# endif /* !BROADCAST */
-
-#else /* !ONESHOT */
+static inline bool tick_oneshot_possible(void) { return true; }
+extern int tick_oneshot_mode_active(void);
+extern void tick_clock_notify(void);
+extern int tick_check_oneshot_change(int allow_nohz);
+extern int tick_init_highres(void);
+#else /* !CONFIG_TICK_ONESHOT: */
 static inline
 void tick_setup_oneshot(struct clock_event_device *newdev,
 			void (*handler)(struct clock_event_device *),
-			ktime_t nextevt)
-{
-	BUG();
-}
-static inline void tick_resume_oneshot(void)
-{
-	BUG();
-}
-static inline int tick_program_event(ktime_t expires, int force)
-{
-	return 0;
-}
+			ktime_t nextevt) { BUG(); }
+static inline void tick_resume_oneshot(void) { BUG(); }
+static inline int tick_program_event(ktime_t expires, int force) { return 0; }
 static inline void tick_oneshot_notify(void) { }
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-	BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
-{
-	return 0;
-}
+static inline bool tick_oneshot_possible(void) { return false; }
+static inline int tick_oneshot_mode_active(void) { return 0; }
+static inline void tick_clock_notify(void) { }
+static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+#endif /* !CONFIG_TICK_ONESHOT */
+
+/* Functions related to oneshot broadcasting */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
+extern void tick_broadcast_switch_to_oneshot(void);
+extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
+extern int tick_broadcast_oneshot_active(void);
+extern void tick_check_oneshot_broadcast_this_cpu(void);
+bool tick_broadcast_oneshot_available(void);
+extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
+#else /* !(BROADCAST && ONESHOT): */
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
+static inline void tick_broadcast_switch_to_oneshot(void) { }
+static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline bool tick_broadcast_oneshot_available(void) { return false; }
-#endif /* !TICK_ONESHOT */
+static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
+static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
+#endif /* !(BROADCAST && ONESHOT) */
 
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
@@ -105,68 +137,3 @@
 # else
 static inline void tick_nohz_init(void) { }
 #endif
-
-/*
- * Broadcasting support
- */
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern void tick_install_broadcast_device(struct clock_event_device *dev);
-extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
-extern void tick_shutdown_broadcast(unsigned int *cpup);
-extern void tick_suspend_broadcast(void);
-extern int tick_resume_broadcast(void);
-extern void tick_broadcast_init(void);
-extern void
-tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
-int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
-
-#else /* !BROADCAST */
-
-static inline void tick_install_broadcast_device(struct clock_event_device *dev)
-{
-}
-
-static inline int tick_is_broadcast_device(struct clock_event_device *dev)
-{
-	return 0;
-}
-static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
-					     int cpu)
-{
-	return 0;
-}
-static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
-static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
-static inline void tick_suspend_broadcast(void) { }
-static inline int tick_resume_broadcast(void) { return 0; }
-static inline void tick_broadcast_init(void) { }
-static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
-					     u32 freq) { return -ENODEV; }
-
-/*
- * Set the periodic handler in non broadcast mode
- */
-static inline void tick_set_periodic_handler(struct clock_event_device *dev,
-					     int broadcast)
-{
-	dev->event_handler = tick_handle_periodic;
-}
-#endif /* !BROADCAST */
-
-/*
- * Check, if the device is functional or a dummy for broadcast
- */
-static inline int tick_device_is_functional(struct clock_event_device *dev)
-{
-	return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
-}
-
-int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
-
-#endif
-
-extern void do_timer(unsigned long ticks);
-extern void update_wall_time(void);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 7ce740e..67a64b1 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -38,7 +38,7 @@
 {
 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 	clockevents_program_event(dev, ktime_get(), true);
 }
 
@@ -50,7 +50,7 @@
 			ktime_t next_event)
 {
 	newdev->event_handler = handler;
-	clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
 	clockevents_program_event(newdev, next_event, true);
 }
 
@@ -81,7 +81,7 @@
 
 	td->mode = TICKDEV_MODE_ONESHOT;
 	dev->event_handler = handler;
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 	tick_broadcast_switch_to_oneshot();
 	return 0;
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a4c4eda..9142591 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -34,7 +34,7 @@
 /*
  * Per cpu nohz control structure
  */
-DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
  * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -416,6 +416,11 @@
 
 __setup("nohz=", setup_tick_nohz);
 
+int tick_nohz_tick_stopped(void)
+{
+	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
 /**
  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
  *
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
new file mode 100644
index 0000000..28b5da3
--- /dev/null
+++ b/kernel/time/tick-sched.h
@@ -0,0 +1,74 @@
+#ifndef _TICK_SCHED_H
+#define _TICK_SCHED_H
+
+#include <linux/hrtimer.h>
+
+enum tick_device_mode {
+	TICKDEV_MODE_PERIODIC,
+	TICKDEV_MODE_ONESHOT,
+};
+
+struct tick_device {
+	struct clock_event_device *evtdev;
+	enum tick_device_mode mode;
+};
+
+enum tick_nohz_mode {
+	NOHZ_MODE_INACTIVE,
+	NOHZ_MODE_LOWRES,
+	NOHZ_MODE_HIGHRES,
+};
+
+/**
+ * struct tick_sched - sched tick emulation and no idle tick control/stats
+ * @sched_timer:	hrtimer to schedule the periodic tick in high
+ *			resolution mode
+ * @last_tick:		Store the last tick expiry time when the tick
+ *			timer is modified for nohz sleeps. This is necessary
+ *			to resume the tick timer operation in the timeline
+ *			when the CPU returns from nohz sleep.
+ * @tick_stopped:	Indicator that the idle tick has been stopped
+ * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
+ * @idle_calls:		Total number of idle calls
+ * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
+ * @idle_entrytime:	Time when the idle call was entered
+ * @idle_waketime:	Time when the idle was interrupted
+ * @idle_exittime:	Time when the idle state was left
+ * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
+ * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
+ * @sleep_length:	Duration of the current idle sleep
+ * @do_timer_lst:	CPU was the last one doing do_timer before going idle
+ */
+struct tick_sched {
+	struct hrtimer			sched_timer;
+	unsigned long			check_clocks;
+	enum tick_nohz_mode		nohz_mode;
+	ktime_t				last_tick;
+	int				inidle;
+	int				tick_stopped;
+	unsigned long			idle_jiffies;
+	unsigned long			idle_calls;
+	unsigned long			idle_sleeps;
+	int				idle_active;
+	ktime_t				idle_entrytime;
+	ktime_t				idle_waketime;
+	ktime_t				idle_exittime;
+	ktime_t				idle_sleeptime;
+	ktime_t				iowait_sleeptime;
+	ktime_t				sleep_length;
+	unsigned long			last_jiffies;
+	unsigned long			next_jiffies;
+	ktime_t				idle_expires;
+	int				do_timer_last;
+};
+
+extern struct tick_sched *tick_get_tick_sched(int cpu);
+
+extern void tick_setup_sched_timer(void);
+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
+extern void tick_cancel_sched_timer(int cpu);
+#else
+static inline void tick_cancel_sched_timer(int cpu) { }
+#endif
+
+#endif
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db941..946acb7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,17 +59,15 @@
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-/* Flag for if there is a persistent clock on this platform */
-bool __read_mostly persistent_clock_exist = false;
-
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-	while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
-		tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+	while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
+		tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 		tk->xtime_sec++;
 	}
 }
@@ -79,20 +77,20 @@
 	struct timespec64 ts;
 
 	ts.tv_sec = tk->xtime_sec;
-	ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	return ts;
 }
 
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
-	tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 }
 
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec += ts->tv_sec;
-	tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
 	tk_normalize_xtime(tk);
 }
 
@@ -118,6 +116,117 @@
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+
+	cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+	const char *name = tk->tkr_mono.clock->name;
+
+	if (offset > max_cycles) {
+		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+				offset, name, max_cycles);
+		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+	} else {
+		if (offset > (max_cycles >> 1)) {
+			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+					offset, name, max_cycles >> 1);
+			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+		}
+	}
+
+	if (timekeeping_underflow_seen) {
+		if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+			printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+			printk_deferred("         Your kernel is probably still fine.\n");
+			timekeeping_last_warning = jiffies;
+		}
+		timekeeping_underflow_seen = 0;
+	}
+
+	if (timekeeping_overflow_seen) {
+		if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+			printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+			printk_deferred("         Your kernel is probably still fine.\n");
+			timekeeping_last_warning = jiffies;
+		}
+		timekeeping_overflow_seen = 0;
+	}
+}
+
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+	cycle_t now, last, mask, max, delta;
+	unsigned int seq;
+
+	/*
+	 * Since we're called holding a seqlock, the data may shift
+	 * under us while we're doing the calculation. This can cause
+	 * false positives, since we'd note a problem but throw the
+	 * results away. So nest another seqlock here to atomically
+	 * grab the points we are checking with.
+	 */
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		now = tkr->read(tkr->clock);
+		last = tkr->cycle_last;
+		mask = tkr->mask;
+		max = tkr->clock->max_cycles;
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	delta = clocksource_delta(now, last, mask);
+
+	/*
+	 * Try to catch underflows by checking if we are seeing small
+	 * mask-relative negative values.
+	 */
+	if (unlikely((~delta & mask) < (mask >> 3))) {
+		timekeeping_underflow_seen = 1;
+		delta = 0;
+	}
+
+	/* Cap delta value to the max_cycles values to avoid mult overflows */
+	if (unlikely(delta > max)) {
+		timekeeping_overflow_seen = 1;
+		delta = tkr->clock->max_cycles;
+	}
+
+	return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+	cycle_t cycle_now, delta;
+
+	/* read clocksource */
+	cycle_now = tkr->read(tkr->clock);
+
+	/* calculate the delta since the last update_wall_time */
+	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+	return delta;
+}
+#endif
+
 /**
  * tk_setup_internals - Set up internals to use clocksource clock.
  *
@@ -135,11 +244,16 @@
 	u64 tmp, ntpinterval;
 	struct clocksource *old_clock;
 
-	old_clock = tk->tkr.clock;
-	tk->tkr.clock = clock;
-	tk->tkr.read = clock->read;
-	tk->tkr.mask = clock->mask;
-	tk->tkr.cycle_last = tk->tkr.read(clock);
+	old_clock = tk->tkr_mono.clock;
+	tk->tkr_mono.clock = clock;
+	tk->tkr_mono.read = clock->read;
+	tk->tkr_mono.mask = clock->mask;
+	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+
+	tk->tkr_raw.clock = clock;
+	tk->tkr_raw.read = clock->read;
+	tk->tkr_raw.mask = clock->mask;
+	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +277,14 @@
 	if (old_clock) {
 		int shift_change = clock->shift - old_clock->shift;
 		if (shift_change < 0)
-			tk->tkr.xtime_nsec >>= -shift_change;
+			tk->tkr_mono.xtime_nsec >>= -shift_change;
 		else
-			tk->tkr.xtime_nsec <<= shift_change;
+			tk->tkr_mono.xtime_nsec <<= shift_change;
 	}
-	tk->tkr.shift = clock->shift;
+	tk->tkr_raw.xtime_nsec = 0;
+
+	tk->tkr_mono.shift = clock->shift;
+	tk->tkr_raw.shift = clock->shift;
 
 	tk->ntp_error = 0;
 	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +295,8 @@
 	 * active clocksource. These value will be adjusted via NTP
 	 * to counteract clock drifting.
 	 */
-	tk->tkr.mult = clock->mult;
+	tk->tkr_mono.mult = clock->mult;
+	tk->tkr_raw.mult = clock->mult;
 	tk->ntp_err_mult = 0;
 }
 
@@ -193,14 +311,10 @@
 
 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-	cycle_t cycle_now, delta;
+	cycle_t delta;
 	s64 nsec;
 
-	/* read clocksource: */
-	cycle_now = tkr->read(tkr->clock);
-
-	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+	delta = timekeeping_get_delta(tkr);
 
 	nsec = delta * tkr->mult + tkr->xtime_nsec;
 	nsec >>= tkr->shift;
@@ -209,25 +323,6 @@
 	return nsec + arch_gettimeoffset();
 }
 
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-	struct clocksource *clock = tk->tkr.clock;
-	cycle_t cycle_now, delta;
-	s64 nsec;
-
-	/* read clocksource: */
-	cycle_now = tk->tkr.read(clock);
-
-	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-
-	/* convert delta to nanoseconds. */
-	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-
-	/* If arch requires, add in get_arch_timeoffset() */
-	return nsec + arch_gettimeoffset();
-}
-
 /**
  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
  * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +362,18 @@
  * slightly wrong timestamp (a few nanoseconds). See
  * @ktime_get_mono_fast_ns.
  */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
 {
-	struct tk_read_base *base = tk_fast_mono.base;
+	struct tk_read_base *base = tkf->base;
 
 	/* Force readers off to base[1] */
-	raw_write_seqcount_latch(&tk_fast_mono.seq);
+	raw_write_seqcount_latch(&tkf->seq);
 
 	/* Update base[0] */
 	memcpy(base, tkr, sizeof(*base));
 
 	/* Force readers back to base[0] */
-	raw_write_seqcount_latch(&tk_fast_mono.seq);
+	raw_write_seqcount_latch(&tkf->seq);
 
 	/* Update base[1] */
 	memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +411,33 @@
  * of the following timestamps. Callers need to be aware of that and
  * deal with it.
  */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
 	struct tk_read_base *tkr;
 	unsigned int seq;
 	u64 now;
 
 	do {
-		seq = raw_read_seqcount(&tk_fast_mono.seq);
-		tkr = tk_fast_mono.base + (seq & 0x01);
-		now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+		seq = raw_read_seqcount(&tkf->seq);
+		tkr = tkf->base + (seq & 0x01);
+		now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+	} while (read_seqcount_retry(&tkf->seq, seq));
 
-	} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
 	return now;
 }
+
+u64 ktime_get_mono_fast_ns(void)
+{
+	return __ktime_get_fast_ns(&tk_fast_mono);
+}
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
 
+u64 ktime_get_raw_fast_ns(void)
+{
+	return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
@@ -353,12 +459,17 @@
 static void halt_fast_timekeeper(struct timekeeper *tk)
 {
 	static struct tk_read_base tkr_dummy;
-	struct tk_read_base *tkr = &tk->tkr;
+	struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tkr->read(tkr->clock);
 	tkr_dummy.read = dummy_clock_read;
-	update_fast_timekeeper(&tkr_dummy);
+	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+
+	tkr = &tk->tkr_raw;
+	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+	tkr_dummy.read = dummy_clock_read;
+	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +480,8 @@
 
 	xt = timespec64_to_timespec(tk_xtime(tk));
 	wm = timespec64_to_timespec(tk->wall_to_monotonic);
-	update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
-			    tk->tkr.cycle_last);
+	update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
+			    tk->tkr_mono.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +498,11 @@
 	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
 	* users are removed, this can be killed.
 	*/
-	remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
-	tk->tkr.xtime_nsec -= remainder;
-	tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+	remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
+	tk->tkr_mono.xtime_nsec -= remainder;
+	tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
 	tk->ntp_error += remainder << tk->ntp_error_shift;
-	tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+	tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -456,17 +567,17 @@
 	 */
 	seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
 	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-	tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
 	/* Update the monotonic raw base */
-	tk->base_raw = timespec64_to_ktime(tk->raw_time);
+	tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
 
 	/*
 	 * The sum of the nanoseconds portions of xtime and
 	 * wall_to_monotonic can be greater/equal one second. Take
 	 * this into account before updating tk->ktime_sec.
 	 */
-	nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	if (nsec >= NSEC_PER_SEC)
 		seconds++;
 	tk->ktime_sec = seconds;
@@ -489,7 +600,8 @@
 		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
 		       sizeof(tk_core.timekeeper));
 
-	update_fast_timekeeper(&tk->tkr);
+	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+	update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 }
 
 /**
@@ -501,22 +613,23 @@
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	cycle_t cycle_now, delta;
 	s64 nsec;
 
-	cycle_now = tk->tkr.read(clock);
-	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-	tk->tkr.cycle_last = cycle_now;
+	cycle_now = tk->tkr_mono.read(clock);
+	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
+	tk->tkr_mono.cycle_last = cycle_now;
+	tk->tkr_raw.cycle_last  = cycle_now;
 
-	tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+	tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
 
 	/* If arch requires, add in get_arch_timeoffset() */
-	tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
 
 	tk_normalize_xtime(tk);
 
-	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+	nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
 	timespec64_add_ns(&tk->raw_time, nsec);
 }
 
@@ -537,7 +650,7 @@
 		seq = read_seqcount_begin(&tk_core.seq);
 
 		ts->tv_sec = tk->xtime_sec;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -577,8 +690,8 @@
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->tkr.base_mono;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		base = tk->tkr_mono.base;
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -603,8 +716,8 @@
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = ktime_add(tk->tkr.base_mono, *offset);
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		base = ktime_add(tk->tkr_mono.base, *offset);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -645,8 +758,8 @@
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->base_raw;
-		nsecs = timekeeping_get_ns_raw(tk);
+		base = tk->tkr_raw.base;
+		nsecs = timekeeping_get_ns(&tk->tkr_raw);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -674,7 +787,7 @@
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		ts->tv_sec = tk->xtime_sec;
-		nsec = timekeeping_get_ns(&tk->tkr);
+		nsec = timekeeping_get_ns(&tk->tkr_mono);
 		tomono = tk->wall_to_monotonic;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +872,8 @@
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
 
-		nsecs_raw = timekeeping_get_ns_raw(tk);
-		nsecs_real = timekeeping_get_ns(&tk->tkr);
+		nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
+		nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -943,7 +1056,7 @@
 	 */
 	if (try_module_get(new->owner)) {
 		if (!new->enable || new->enable(new) == 0) {
-			old = tk->tkr.clock;
+			old = tk->tkr_mono.clock;
 			tk_setup_internals(tk, new);
 			if (old->disable)
 				old->disable(old);
@@ -971,11 +1084,11 @@
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 
-	if (tk->tkr.clock == clock)
+	if (tk->tkr_mono.clock == clock)
 		return 0;
 	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
-	return tk->tkr.clock == clock ? 0 : -1;
+	return tk->tkr_mono.clock == clock ? 0 : -1;
 }
 
 /**
@@ -993,7 +1106,7 @@
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		nsecs = timekeeping_get_ns_raw(tk);
+		nsecs = timekeeping_get_ns(&tk->tkr_raw);
 		ts64 = tk->raw_time;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1129,7 @@
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+		ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1035,7 +1148,7 @@
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		ret = tk->tkr.clock->max_idle_ns;
+		ret = tk->tkr_mono.clock->max_idle_ns;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1057,6 +1170,14 @@
 	ts->tv_nsec = 0;
 }
 
+void __weak read_persistent_clock64(struct timespec64 *ts64)
+{
+	struct timespec ts;
+
+	read_persistent_clock(&ts);
+	*ts64 = timespec_to_timespec64(ts);
+}
+
 /**
  * read_boot_clock -  Return time of the system start.
  *
@@ -1072,6 +1193,20 @@
 	ts->tv_nsec = 0;
 }
 
+void __weak read_boot_clock64(struct timespec64 *ts64)
+{
+	struct timespec ts;
+
+	read_boot_clock(&ts);
+	*ts64 = timespec_to_timespec64(ts);
+}
+
+/* Flag for if timekeeping_resume() has injected sleeptime */
+static bool sleeptime_injected;
+
+/* Flag for if there is a persistent clock on this platform */
+static bool persistent_clock_exists;
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -1081,20 +1216,17 @@
 	struct clocksource *clock;
 	unsigned long flags;
 	struct timespec64 now, boot, tmp;
-	struct timespec ts;
 
-	read_persistent_clock(&ts);
-	now = timespec_to_timespec64(ts);
+	read_persistent_clock64(&now);
 	if (!timespec64_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
 		now.tv_nsec = 0;
 	} else if (now.tv_sec || now.tv_nsec)
-		persistent_clock_exist = true;
+		persistent_clock_exists = true;
 
-	read_boot_clock(&ts);
-	boot = timespec_to_timespec64(ts);
+	read_boot_clock64(&boot);
 	if (!timespec64_valid_strict(&boot)) {
 		pr_warn("WARNING: Boot clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
@@ -1114,7 +1246,6 @@
 	tk_set_xtime(tk, &now);
 	tk->raw_time.tv_sec = 0;
 	tk->raw_time.tv_nsec = 0;
-	tk->base_raw.tv64 = 0;
 	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
 		boot = tk_xtime(tk);
 
@@ -1127,7 +1258,7 @@
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
-/* time in seconds when suspend began */
+/* time in seconds when suspend began for persistent clock */
 static struct timespec64 timekeeping_suspend_time;
 
 /**
@@ -1152,12 +1283,49 @@
 	tk_debug_account_sleep_time(delta);
 }
 
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
+/**
+ * We have three kinds of time sources to use for sleep time
+ * injection, the preference order is:
+ * 1) non-stop clocksource
+ * 2) persistent clock (ie: RTC accessible when irqs are off)
+ * 3) RTC
+ *
+ * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
+ * If system has neither 1) nor 2), 3) will be used finally.
+ *
+ *
+ * If timekeeping has injected sleeptime via either 1) or 2),
+ * 3) becomes needless, so in this case we don't need to call
+ * rtc_resume(), and this is what timekeeping_rtc_skipresume()
+ * means.
+ */
+bool timekeeping_rtc_skipresume(void)
+{
+	return sleeptime_injected;
+}
+
+/**
+ * 1) can be determined whether to use or not only when doing
+ * timekeeping_resume() which is invoked after rtc_suspend(),
+ * so we can't skip rtc_suspend() surely if system has 1).
+ *
+ * But if system has 2), 2) will definitely be used, so in this
+ * case we don't need to call rtc_suspend(), and this is what
+ * timekeeping_rtc_skipsuspend() means.
+ */
+bool timekeeping_rtc_skipsuspend(void)
+{
+	return persistent_clock_exists;
+}
+
 /**
  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  * @delta: pointer to a timespec64 delta value
  *
- * This hook is for architectures that cannot support read_persistent_clock
+ * This hook is for architectures that cannot support read_persistent_clock64
  * because their RTC/persistent clock is only accessible when irqs are enabled.
+ * and also don't have an effective nonstop clocksource.
  *
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
@@ -1167,13 +1335,6 @@
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 
-	/*
-	 * Make sure we don't set the clock twice, as timekeeping_resume()
-	 * already did it
-	 */
-	if (has_persistent_clock())
-		return;
-
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 
@@ -1189,26 +1350,21 @@
 	/* signal hrtimers about time change */
 	clock_was_set();
 }
+#endif
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
- *
- * This is for the generic clocksource timekeeping.
- * xtime/wall_to_monotonic/jiffies/etc are
- * still managed by arch specific suspend/resume code.
  */
 void timekeeping_resume(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
-	struct timespec tmp;
 	cycle_t cycle_now, cycle_delta;
-	bool suspendtime_found = false;
 
-	read_persistent_clock(&tmp);
-	ts_new = timespec_to_timespec64(tmp);
+	sleeptime_injected = false;
+	read_persistent_clock64(&ts_new);
 
 	clockevents_resume();
 	clocksource_resume();
@@ -1228,16 +1384,16 @@
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr.read(clock);
+	cycle_now = tk->tkr_mono.read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > tk->tkr.cycle_last) {
+		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 num, max = ULLONG_MAX;
 		u32 mult = clock->mult;
 		u32 shift = clock->shift;
 		s64 nsec = 0;
 
-		cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
-						tk->tkr.mask);
+		cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+						tk->tkr_mono.mask);
 
 		/*
 		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1253,17 +1409,19 @@
 		nsec += ((u64) cycle_delta * mult) >> shift;
 
 		ts_delta = ns_to_timespec64(nsec);
-		suspendtime_found = true;
+		sleeptime_injected = true;
 	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
 		ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
-		suspendtime_found = true;
+		sleeptime_injected = true;
 	}
 
-	if (suspendtime_found)
+	if (sleeptime_injected)
 		__timekeeping_inject_sleeptime(tk, &ts_delta);
 
 	/* Re-base the last cycle value */
-	tk->tkr.cycle_last = cycle_now;
+	tk->tkr_mono.cycle_last = cycle_now;
+	tk->tkr_raw.cycle_last  = cycle_now;
+
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1272,9 +1430,7 @@
 
 	touch_softlockup_watchdog();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
-
-	/* Resume hrtimers */
+	tick_resume();
 	hrtimers_resume();
 }
 
@@ -1284,10 +1440,8 @@
 	unsigned long flags;
 	struct timespec64		delta, delta_delta;
 	static struct timespec64	old_delta;
-	struct timespec tmp;
 
-	read_persistent_clock(&tmp);
-	timekeeping_suspend_time = timespec_to_timespec64(tmp);
+	read_persistent_clock64(&timekeeping_suspend_time);
 
 	/*
 	 * On some systems the persistent_clock can not be detected at
@@ -1295,31 +1449,33 @@
 	 * value returned, update the persistent_clock_exists flag.
 	 */
 	if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
-		persistent_clock_exist = true;
+		persistent_clock_exists = true;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
-	/*
-	 * To avoid drift caused by repeated suspend/resumes,
-	 * which each can add ~1 second drift error,
-	 * try to compensate so the difference in system time
-	 * and persistent_clock time stays close to constant.
-	 */
-	delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
-	delta_delta = timespec64_sub(delta, old_delta);
-	if (abs(delta_delta.tv_sec)  >= 2) {
+	if (persistent_clock_exists) {
 		/*
-		 * if delta_delta is too large, assume time correction
-		 * has occured and set old_delta to the current delta.
+		 * To avoid drift caused by repeated suspend/resumes,
+		 * which each can add ~1 second drift error,
+		 * try to compensate so the difference in system time
+		 * and persistent_clock time stays close to constant.
 		 */
-		old_delta = delta;
-	} else {
-		/* Otherwise try to adjust old_system to compensate */
-		timekeeping_suspend_time =
-			timespec64_add(timekeeping_suspend_time, delta_delta);
+		delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+		delta_delta = timespec64_sub(delta, old_delta);
+		if (abs(delta_delta.tv_sec) >= 2) {
+			/*
+			 * if delta_delta is too large, assume time correction
+			 * has occurred and set old_delta to the current delta.
+			 */
+			old_delta = delta;
+		} else {
+			/* Otherwise try to adjust old_system to compensate */
+			timekeeping_suspend_time =
+				timespec64_add(timekeeping_suspend_time, delta_delta);
+		}
 	}
 
 	timekeeping_update(tk, TK_MIRROR);
@@ -1327,7 +1483,7 @@
 	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+	tick_suspend();
 	clocksource_suspend();
 	clockevents_suspend();
 
@@ -1416,15 +1572,15 @@
 	 *
 	 * XXX - TODO: Doc ntp_error calculation.
 	 */
-	if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+	if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
 		/* NTP adjustment caused clocksource mult overflow */
 		WARN_ON_ONCE(1);
 		return;
 	}
 
-	tk->tkr.mult += mult_adj;
+	tk->tkr_mono.mult += mult_adj;
 	tk->xtime_interval += interval;
-	tk->tkr.xtime_nsec -= offset;
+	tk->tkr_mono.xtime_nsec -= offset;
 	tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
 
@@ -1486,13 +1642,13 @@
 		tk->ntp_err_mult = 0;
 	}
 
-	if (unlikely(tk->tkr.clock->maxadj &&
-		(abs(tk->tkr.mult - tk->tkr.clock->mult)
-			> tk->tkr.clock->maxadj))) {
+	if (unlikely(tk->tkr_mono.clock->maxadj &&
+		(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
+			> tk->tkr_mono.clock->maxadj))) {
 		printk_once(KERN_WARNING
 			"Adjusting %s more than 11%% (%ld vs %ld)\n",
-			tk->tkr.clock->name, (long)tk->tkr.mult,
-			(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+			tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
+			(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
 	}
 
 	/*
@@ -1509,9 +1665,9 @@
 	 * We'll correct this error next time through this function, when
 	 * xtime_nsec is not as small.
 	 */
-	if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
-		s64 neg = -(s64)tk->tkr.xtime_nsec;
-		tk->tkr.xtime_nsec = 0;
+	if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
+		s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
+		tk->tkr_mono.xtime_nsec = 0;
 		tk->ntp_error += neg << tk->ntp_error_shift;
 	}
 }
@@ -1526,13 +1682,13 @@
  */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 	unsigned int clock_set = 0;
 
-	while (tk->tkr.xtime_nsec >= nsecps) {
+	while (tk->tkr_mono.xtime_nsec >= nsecps) {
 		int leap;
 
-		tk->tkr.xtime_nsec -= nsecps;
+		tk->tkr_mono.xtime_nsec -= nsecps;
 		tk->xtime_sec++;
 
 		/* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1733,10 @@
 
 	/* Accumulate one shifted interval */
 	offset -= interval;
-	tk->tkr.cycle_last += interval;
+	tk->tkr_mono.cycle_last += interval;
+	tk->tkr_raw.cycle_last  += interval;
 
-	tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+	tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
@@ -1622,14 +1779,17 @@
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
-				   tk->tkr.cycle_last, tk->tkr.mask);
+	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
 	/* Check if there's really nothing to do */
 	if (offset < real_tk->cycle_interval)
 		goto out;
 
+	/* Do some additional sanity checking */
+	timekeeping_check_update(real_tk, offset);
+
 	/*
 	 * With NO_HZ we may have to accumulate many cycle_intervals
 	 * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1944,8 @@
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		base = tk->tkr.base_mono;
-		nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+		base = tk->tkr_mono.base;
+		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
@@ -1816,8 +1976,8 @@
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		base = tk->tkr.base_mono;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		base = tk->tkr_mono.base;
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 1d91416..ead8794 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -19,4 +19,11 @@
 extern int timekeeping_suspend(void);
 extern void timekeeping_resume(void);
 
+extern void do_timer(unsigned long ticks);
+extern void update_wall_time(void);
+
+extern seqlock_t jiffies_lock;
+
+#define CS_NAME_LEN	32
+
 #endif
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d3f5c5..2ece3aa 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -90,8 +90,18 @@
 	struct tvec tv5;
 } ____cacheline_aligned;
 
+/*
+ * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
+ * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
+ * pointer to per-cpu entries because we don't know where we'll map the section,
+ * even for the boot cpu.
+ *
+ * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
+ * rest of them.
+ */
 struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
+
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /* Functions below help us manage 'deferrable' flag */
@@ -1027,6 +1037,8 @@
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1532,64 +1544,6 @@
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-static int init_timers_cpu(int cpu)
-{
-	int j;
-	struct tvec_base *base;
-	static char tvec_base_done[NR_CPUS];
-
-	if (!tvec_base_done[cpu]) {
-		static char boot_done;
-
-		if (boot_done) {
-			/*
-			 * The APs use this path later in boot
-			 */
-			base = kzalloc_node(sizeof(*base), GFP_KERNEL,
-					    cpu_to_node(cpu));
-			if (!base)
-				return -ENOMEM;
-
-			/* Make sure tvec_base has TIMER_FLAG_MASK bits free */
-			if (WARN_ON(base != tbase_get_base(base))) {
-				kfree(base);
-				return -ENOMEM;
-			}
-			per_cpu(tvec_bases, cpu) = base;
-		} else {
-			/*
-			 * This is for the boot CPU - we use compile-time
-			 * static initialisation because per-cpu memory isn't
-			 * ready yet and because the memory allocators are not
-			 * initialised either.
-			 */
-			boot_done = 1;
-			base = &boot_tvec_bases;
-		}
-		spin_lock_init(&base->lock);
-		tvec_base_done[cpu] = 1;
-		base->cpu = cpu;
-	} else {
-		base = per_cpu(tvec_bases, cpu);
-	}
-
-
-	for (j = 0; j < TVN_SIZE; j++) {
-		INIT_LIST_HEAD(base->tv5.vec + j);
-		INIT_LIST_HEAD(base->tv4.vec + j);
-		INIT_LIST_HEAD(base->tv3.vec + j);
-		INIT_LIST_HEAD(base->tv2.vec + j);
-	}
-	for (j = 0; j < TVR_SIZE; j++)
-		INIT_LIST_HEAD(base->tv1.vec + j);
-
-	base->timer_jiffies = jiffies;
-	base->next_timer = base->timer_jiffies;
-	base->active_timers = 0;
-	base->all_timers = 0;
-	return 0;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
@@ -1631,55 +1585,86 @@
 		migrate_timer_list(new_base, old_base->tv5.vec + i);
 	}
 
+	old_base->active_timers = 0;
+	old_base->all_timers = 0;
+
 	spin_unlock(&old_base->lock);
 	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(tvec_bases);
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static int timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-	long cpu = (long)hcpu;
-	int err;
-
-	switch(action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		err = init_timers_cpu(cpu);
-		if (err < 0)
-			return notifier_from_errno(err);
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
+	switch (action) {
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		migrate_timers(cpu);
+		migrate_timers((long)hcpu);
 		break;
-#endif
 	default:
 		break;
 	}
+
 	return NOTIFY_OK;
 }
 
-static struct notifier_block timers_nb = {
-	.notifier_call	= timer_cpu_notify,
-};
+static inline void timer_register_cpu_notifier(void)
+{
+	cpu_notifier(timer_cpu_notify, 0);
+}
+#else
+static inline void timer_register_cpu_notifier(void) { }
+#endif /* CONFIG_HOTPLUG_CPU */
 
+static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+{
+	int j;
+
+	BUG_ON(base != tbase_get_base(base));
+
+	base->cpu = cpu;
+	per_cpu(tvec_bases, cpu) = base;
+	spin_lock_init(&base->lock);
+
+	for (j = 0; j < TVN_SIZE; j++) {
+		INIT_LIST_HEAD(base->tv5.vec + j);
+		INIT_LIST_HEAD(base->tv4.vec + j);
+		INIT_LIST_HEAD(base->tv3.vec + j);
+		INIT_LIST_HEAD(base->tv2.vec + j);
+	}
+	for (j = 0; j < TVR_SIZE; j++)
+		INIT_LIST_HEAD(base->tv1.vec + j);
+
+	base->timer_jiffies = jiffies;
+	base->next_timer = base->timer_jiffies;
+}
+
+static void __init init_timer_cpus(void)
+{
+	struct tvec_base *base;
+	int local_cpu = smp_processor_id();
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == local_cpu)
+			base = &boot_tvec_bases;
+#ifdef CONFIG_SMP
+		else
+			base = per_cpu_ptr(&__tvec_bases, cpu);
+#endif
+
+		init_timer_cpu(base, cpu);
+	}
+}
 
 void __init init_timers(void)
 {
-	int err;
-
 	/* ensure there are enough low bits for flags in timer->base pointer */
 	BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
 
-	err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
-			       (void *)(long)smp_processor_id());
-	BUG_ON(err != NOTIFY_OK);
-
+	init_timer_cpus();
 	init_timer_stats();
-	register_cpu_notifier(&timers_nb);
+	timer_register_cpu_notifier();
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
 }
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 61ed862..e878c2e 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -16,10 +16,10 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
-#include <linux/tick.h>
 
 #include <asm/uaccess.h>
 
+#include "tick-internal.h"
 
 struct timer_list_iter {
 	int cpu;
@@ -228,9 +228,35 @@
 	print_name_offset(m, dev->set_next_event);
 	SEQ_printf(m, "\n");
 
-	SEQ_printf(m, " set_mode:       ");
-	print_name_offset(m, dev->set_mode);
-	SEQ_printf(m, "\n");
+	if (dev->set_mode) {
+		SEQ_printf(m, " set_mode:       ");
+		print_name_offset(m, dev->set_mode);
+		SEQ_printf(m, "\n");
+	} else {
+		if (dev->set_state_shutdown) {
+			SEQ_printf(m, " shutdown: ");
+			print_name_offset(m, dev->set_state_shutdown);
+			SEQ_printf(m, "\n");
+		}
+
+		if (dev->set_state_periodic) {
+			SEQ_printf(m, " periodic: ");
+			print_name_offset(m, dev->set_state_periodic);
+			SEQ_printf(m, "\n");
+		}
+
+		if (dev->set_state_oneshot) {
+			SEQ_printf(m, " oneshot:  ");
+			print_name_offset(m, dev->set_state_oneshot);
+			SEQ_printf(m, "\n");
+		}
+
+		if (dev->tick_resume) {
+			SEQ_printf(m, " resume:   ");
+			print_name_offset(m, dev->tick_resume);
+			SEQ_printf(m, "\n");
+		}
+	}
 
 	SEQ_printf(m, " event_handler:  ");
 	print_name_offset(m, dev->event_handler);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 41ff75b..586ad91 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -159,6 +159,7 @@
 
 	/* see manage_workers() for details on the two manager mutexes */
 	struct mutex		manager_arb;	/* manager arbitration */
+	struct worker		*manager;	/* L: purely informational */
 	struct mutex		attach_mutex;	/* attach/detach exclusion */
 	struct list_head	workers;	/* A: attached workers */
 	struct completion	*detach_completion; /* all workers detached */
@@ -230,7 +231,7 @@
  */
 struct workqueue_struct {
 	struct list_head	pwqs;		/* WR: all pwqs of this wq */
-	struct list_head	list;		/* PL: list of all workqueues */
+	struct list_head	list;		/* PR: list of all workqueues */
 
 	struct mutex		mutex;		/* protects this wq */
 	int			work_color;	/* WQ: current work color */
@@ -257,6 +258,13 @@
 #endif
 	char			name[WQ_NAME_LEN]; /* I: workqueue name */
 
+	/*
+	 * Destruction of workqueue_struct is sched-RCU protected to allow
+	 * walking the workqueues list without grabbing wq_pool_mutex.
+	 * This is used to dump all workqueues from sysrq.
+	 */
+	struct rcu_head		rcu;
+
 	/* hot fields used during command issue, aligned to cacheline */
 	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
@@ -288,7 +296,7 @@
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
-static LIST_HEAD(workqueues);		/* PL: list of all workqueues */
+static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
 
 /* the per-cpu worker pools */
@@ -324,6 +332,7 @@
 static int worker_thread(void *__worker);
 static void copy_workqueue_attrs(struct workqueue_attrs *to,
 				 const struct workqueue_attrs *from);
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -1911,9 +1920,11 @@
 	 */
 	if (!mutex_trylock(&pool->manager_arb))
 		return false;
+	pool->manager = worker;
 
 	maybe_create_worker(pool);
 
+	pool->manager = NULL;
 	mutex_unlock(&pool->manager_arb);
 	return true;
 }
@@ -2303,6 +2314,7 @@
 struct wq_barrier {
 	struct work_struct	work;
 	struct completion	done;
+	struct task_struct	*task;	/* purely informational */
 };
 
 static void wq_barrier_func(struct work_struct *work)
@@ -2351,6 +2363,7 @@
 	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
+	barr->task = current;
 
 	/*
 	 * If @target is currently being executed, schedule the
@@ -2989,323 +3002,6 @@
 }
 EXPORT_SYMBOL_GPL(execute_in_process_context);
 
-#ifdef CONFIG_SYSFS
-/*
- * Workqueues with WQ_SYSFS flag set is visible to userland via
- * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
- * following attributes.
- *
- *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound
- *  max_active	RW int	: maximum number of in-flight work items
- *
- * Unbound workqueues have the following extra attributes.
- *
- *  id		RO int	: the associated pool ID
- *  nice	RW int	: nice value of the workers
- *  cpumask	RW mask	: bitmask of allowed CPUs for the workers
- */
-struct wq_device {
-	struct workqueue_struct		*wq;
-	struct device			dev;
-};
-
-static struct workqueue_struct *dev_to_wq(struct device *dev)
-{
-	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
-
-	return wq_dev->wq;
-}
-
-static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
-}
-static DEVICE_ATTR_RO(per_cpu);
-
-static ssize_t max_active_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
-}
-
-static ssize_t max_active_store(struct device *dev,
-				struct device_attribute *attr, const char *buf,
-				size_t count)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int val;
-
-	if (sscanf(buf, "%d", &val) != 1 || val <= 0)
-		return -EINVAL;
-
-	workqueue_set_max_active(wq, val);
-	return count;
-}
-static DEVICE_ATTR_RW(max_active);
-
-static struct attribute *wq_sysfs_attrs[] = {
-	&dev_attr_per_cpu.attr,
-	&dev_attr_max_active.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(wq_sysfs);
-
-static ssize_t wq_pool_ids_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	const char *delim = "";
-	int node, written = 0;
-
-	rcu_read_lock_sched();
-	for_each_node(node) {
-		written += scnprintf(buf + written, PAGE_SIZE - written,
-				     "%s%d:%d", delim, node,
-				     unbound_pwq_by_node(wq, node)->pool->id);
-		delim = " ";
-	}
-	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
-	rcu_read_unlock_sched();
-
-	return written;
-}
-
-static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int written;
-
-	mutex_lock(&wq->mutex);
-	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
-	mutex_unlock(&wq->mutex);
-
-	return written;
-}
-
-/* prepare workqueue_attrs for sysfs store operations */
-static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
-{
-	struct workqueue_attrs *attrs;
-
-	attrs = alloc_workqueue_attrs(GFP_KERNEL);
-	if (!attrs)
-		return NULL;
-
-	mutex_lock(&wq->mutex);
-	copy_workqueue_attrs(attrs, wq->unbound_attrs);
-	mutex_unlock(&wq->mutex);
-	return attrs;
-}
-
-static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct workqueue_attrs *attrs;
-	int ret;
-
-	attrs = wq_sysfs_prep_attrs(wq);
-	if (!attrs)
-		return -ENOMEM;
-
-	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
-	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
-		ret = apply_workqueue_attrs(wq, attrs);
-	else
-		ret = -EINVAL;
-
-	free_workqueue_attrs(attrs);
-	return ret ?: count;
-}
-
-static ssize_t wq_cpumask_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int written;
-
-	mutex_lock(&wq->mutex);
-	written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
-			    cpumask_pr_args(wq->unbound_attrs->cpumask));
-	mutex_unlock(&wq->mutex);
-	return written;
-}
-
-static ssize_t wq_cpumask_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct workqueue_attrs *attrs;
-	int ret;
-
-	attrs = wq_sysfs_prep_attrs(wq);
-	if (!attrs)
-		return -ENOMEM;
-
-	ret = cpumask_parse(buf, attrs->cpumask);
-	if (!ret)
-		ret = apply_workqueue_attrs(wq, attrs);
-
-	free_workqueue_attrs(attrs);
-	return ret ?: count;
-}
-
-static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int written;
-
-	mutex_lock(&wq->mutex);
-	written = scnprintf(buf, PAGE_SIZE, "%d\n",
-			    !wq->unbound_attrs->no_numa);
-	mutex_unlock(&wq->mutex);
-
-	return written;
-}
-
-static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct workqueue_attrs *attrs;
-	int v, ret;
-
-	attrs = wq_sysfs_prep_attrs(wq);
-	if (!attrs)
-		return -ENOMEM;
-
-	ret = -EINVAL;
-	if (sscanf(buf, "%d", &v) == 1) {
-		attrs->no_numa = !v;
-		ret = apply_workqueue_attrs(wq, attrs);
-	}
-
-	free_workqueue_attrs(attrs);
-	return ret ?: count;
-}
-
-static struct device_attribute wq_sysfs_unbound_attrs[] = {
-	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
-	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
-	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
-	__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
-	__ATTR_NULL,
-};
-
-static struct bus_type wq_subsys = {
-	.name				= "workqueue",
-	.dev_groups			= wq_sysfs_groups,
-};
-
-static int __init wq_sysfs_init(void)
-{
-	return subsys_virtual_register(&wq_subsys, NULL);
-}
-core_initcall(wq_sysfs_init);
-
-static void wq_device_release(struct device *dev)
-{
-	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
-
-	kfree(wq_dev);
-}
-
-/**
- * workqueue_sysfs_register - make a workqueue visible in sysfs
- * @wq: the workqueue to register
- *
- * Expose @wq in sysfs under /sys/bus/workqueue/devices.
- * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
- * which is the preferred method.
- *
- * Workqueue user should use this function directly iff it wants to apply
- * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
- * apply_workqueue_attrs() may race against userland updating the
- * attributes.
- *
- * Return: 0 on success, -errno on failure.
- */
-int workqueue_sysfs_register(struct workqueue_struct *wq)
-{
-	struct wq_device *wq_dev;
-	int ret;
-
-	/*
-	 * Adjusting max_active or creating new pwqs by applyting
-	 * attributes breaks ordering guarantee.  Disallow exposing ordered
-	 * workqueues.
-	 */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
-		return -EINVAL;
-
-	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
-	if (!wq_dev)
-		return -ENOMEM;
-
-	wq_dev->wq = wq;
-	wq_dev->dev.bus = &wq_subsys;
-	wq_dev->dev.init_name = wq->name;
-	wq_dev->dev.release = wq_device_release;
-
-	/*
-	 * unbound_attrs are created separately.  Suppress uevent until
-	 * everything is ready.
-	 */
-	dev_set_uevent_suppress(&wq_dev->dev, true);
-
-	ret = device_register(&wq_dev->dev);
-	if (ret) {
-		kfree(wq_dev);
-		wq->wq_dev = NULL;
-		return ret;
-	}
-
-	if (wq->flags & WQ_UNBOUND) {
-		struct device_attribute *attr;
-
-		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
-			ret = device_create_file(&wq_dev->dev, attr);
-			if (ret) {
-				device_unregister(&wq_dev->dev);
-				wq->wq_dev = NULL;
-				return ret;
-			}
-		}
-	}
-
-	dev_set_uevent_suppress(&wq_dev->dev, false);
-	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
-	return 0;
-}
-
-/**
- * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
- * @wq: the workqueue to unregister
- *
- * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
- */
-static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
-{
-	struct wq_device *wq_dev = wq->wq_dev;
-
-	if (!wq->wq_dev)
-		return;
-
-	wq->wq_dev = NULL;
-	device_unregister(&wq_dev->dev);
-}
-#else	/* CONFIG_SYSFS */
-static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
-#endif	/* CONFIG_SYSFS */
-
 /**
  * free_workqueue_attrs - free a workqueue_attrs
  * @attrs: workqueue_attrs to free
@@ -3424,6 +3120,20 @@
 	return 0;
 }
 
+static void rcu_free_wq(struct rcu_head *rcu)
+{
+	struct workqueue_struct *wq =
+		container_of(rcu, struct workqueue_struct, rcu);
+
+	if (!(wq->flags & WQ_UNBOUND))
+		free_percpu(wq->cpu_pwqs);
+	else
+		free_workqueue_attrs(wq->unbound_attrs);
+
+	kfree(wq->rescuer);
+	kfree(wq);
+}
+
 static void rcu_free_pool(struct rcu_head *rcu)
 {
 	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
@@ -3601,12 +3311,10 @@
 
 	/*
 	 * If we're the last pwq going away, @wq is already dead and no one
-	 * is gonna access it anymore.  Free it.
+	 * is gonna access it anymore.  Schedule RCU free.
 	 */
-	if (is_last) {
-		free_workqueue_attrs(wq->unbound_attrs);
-		kfree(wq);
-	}
+	if (is_last)
+		call_rcu_sched(&wq->rcu, rcu_free_wq);
 }
 
 /**
@@ -4143,7 +3851,7 @@
 		pwq_adjust_max_active(pwq);
 	mutex_unlock(&wq->mutex);
 
-	list_add(&wq->list, &workqueues);
+	list_add_tail_rcu(&wq->list, &workqueues);
 
 	mutex_unlock(&wq_pool_mutex);
 
@@ -4199,24 +3907,20 @@
 	 * flushing is complete in case freeze races us.
 	 */
 	mutex_lock(&wq_pool_mutex);
-	list_del_init(&wq->list);
+	list_del_rcu(&wq->list);
 	mutex_unlock(&wq_pool_mutex);
 
 	workqueue_sysfs_unregister(wq);
 
-	if (wq->rescuer) {
+	if (wq->rescuer)
 		kthread_stop(wq->rescuer->task);
-		kfree(wq->rescuer);
-		wq->rescuer = NULL;
-	}
 
 	if (!(wq->flags & WQ_UNBOUND)) {
 		/*
 		 * The base ref is never dropped on per-cpu pwqs.  Directly
-		 * free the pwqs and wq.
+		 * schedule RCU free.
 		 */
-		free_percpu(wq->cpu_pwqs);
-		kfree(wq);
+		call_rcu_sched(&wq->rcu, rcu_free_wq);
 	} else {
 		/*
 		 * We're the sole accessor of @wq at this point.  Directly
@@ -4437,6 +4141,166 @@
 	}
 }
 
+static void pr_cont_pool_info(struct worker_pool *pool)
+{
+	pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
+	if (pool->node != NUMA_NO_NODE)
+		pr_cont(" node=%d", pool->node);
+	pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
+}
+
+static void pr_cont_work(bool comma, struct work_struct *work)
+{
+	if (work->func == wq_barrier_func) {
+		struct wq_barrier *barr;
+
+		barr = container_of(work, struct wq_barrier, work);
+
+		pr_cont("%s BAR(%d)", comma ? "," : "",
+			task_pid_nr(barr->task));
+	} else {
+		pr_cont("%s %pf", comma ? "," : "", work->func);
+	}
+}
+
+static void show_pwq(struct pool_workqueue *pwq)
+{
+	struct worker_pool *pool = pwq->pool;
+	struct work_struct *work;
+	struct worker *worker;
+	bool has_in_flight = false, has_pending = false;
+	int bkt;
+
+	pr_info("  pwq %d:", pool->id);
+	pr_cont_pool_info(pool);
+
+	pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
+		!list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
+
+	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+		if (worker->current_pwq == pwq) {
+			has_in_flight = true;
+			break;
+		}
+	}
+	if (has_in_flight) {
+		bool comma = false;
+
+		pr_info("    in-flight:");
+		hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+			if (worker->current_pwq != pwq)
+				continue;
+
+			pr_cont("%s %d%s:%pf", comma ? "," : "",
+				task_pid_nr(worker->task),
+				worker == pwq->wq->rescuer ? "(RESCUER)" : "",
+				worker->current_func);
+			list_for_each_entry(work, &worker->scheduled, entry)
+				pr_cont_work(false, work);
+			comma = true;
+		}
+		pr_cont("\n");
+	}
+
+	list_for_each_entry(work, &pool->worklist, entry) {
+		if (get_work_pwq(work) == pwq) {
+			has_pending = true;
+			break;
+		}
+	}
+	if (has_pending) {
+		bool comma = false;
+
+		pr_info("    pending:");
+		list_for_each_entry(work, &pool->worklist, entry) {
+			if (get_work_pwq(work) != pwq)
+				continue;
+
+			pr_cont_work(comma, work);
+			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+		}
+		pr_cont("\n");
+	}
+
+	if (!list_empty(&pwq->delayed_works)) {
+		bool comma = false;
+
+		pr_info("    delayed:");
+		list_for_each_entry(work, &pwq->delayed_works, entry) {
+			pr_cont_work(comma, work);
+			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+		}
+		pr_cont("\n");
+	}
+}
+
+/**
+ * show_workqueue_state - dump workqueue state
+ *
+ * Called from a sysrq handler and prints out all busy workqueues and
+ * pools.
+ */
+void show_workqueue_state(void)
+{
+	struct workqueue_struct *wq;
+	struct worker_pool *pool;
+	unsigned long flags;
+	int pi;
+
+	rcu_read_lock_sched();
+
+	pr_info("Showing busy workqueues and worker pools:\n");
+
+	list_for_each_entry_rcu(wq, &workqueues, list) {
+		struct pool_workqueue *pwq;
+		bool idle = true;
+
+		for_each_pwq(pwq, wq) {
+			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+				idle = false;
+				break;
+			}
+		}
+		if (idle)
+			continue;
+
+		pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+		for_each_pwq(pwq, wq) {
+			spin_lock_irqsave(&pwq->pool->lock, flags);
+			if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+				show_pwq(pwq);
+			spin_unlock_irqrestore(&pwq->pool->lock, flags);
+		}
+	}
+
+	for_each_pool(pool, pi) {
+		struct worker *worker;
+		bool first = true;
+
+		spin_lock_irqsave(&pool->lock, flags);
+		if (pool->nr_workers == pool->nr_idle)
+			goto next_pool;
+
+		pr_info("pool %d:", pool->id);
+		pr_cont_pool_info(pool);
+		pr_cont(" workers=%d", pool->nr_workers);
+		if (pool->manager)
+			pr_cont(" manager: %d",
+				task_pid_nr(pool->manager->task));
+		list_for_each_entry(worker, &pool->idle_list, entry) {
+			pr_cont(" %s%d", first ? "idle: " : "",
+				task_pid_nr(worker->task));
+			first = false;
+		}
+		pr_cont("\n");
+	next_pool:
+		spin_unlock_irqrestore(&pool->lock, flags);
+	}
+
+	rcu_read_unlock_sched();
+}
+
 /*
  * CPU hotplug.
  *
@@ -4834,6 +4698,323 @@
 }
 #endif /* CONFIG_FREEZER */
 
+#ifdef CONFIG_SYSFS
+/*
+ * Workqueues with WQ_SYSFS flag set is visible to userland via
+ * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
+ * following attributes.
+ *
+ *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound
+ *  max_active	RW int	: maximum number of in-flight work items
+ *
+ * Unbound workqueues have the following extra attributes.
+ *
+ *  id		RO int	: the associated pool ID
+ *  nice	RW int	: nice value of the workers
+ *  cpumask	RW mask	: bitmask of allowed CPUs for the workers
+ */
+struct wq_device {
+	struct workqueue_struct		*wq;
+	struct device			dev;
+};
+
+static struct workqueue_struct *dev_to_wq(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	return wq_dev->wq;
+}
+
+static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+}
+static DEVICE_ATTR_RO(per_cpu);
+
+static ssize_t max_active_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+}
+
+static ssize_t max_active_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) != 1 || val <= 0)
+		return -EINVAL;
+
+	workqueue_set_max_active(wq, val);
+	return count;
+}
+static DEVICE_ATTR_RW(max_active);
+
+static struct attribute *wq_sysfs_attrs[] = {
+	&dev_attr_per_cpu.attr,
+	&dev_attr_max_active.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(wq_sysfs);
+
+static ssize_t wq_pool_ids_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	const char *delim = "";
+	int node, written = 0;
+
+	rcu_read_lock_sched();
+	for_each_node(node) {
+		written += scnprintf(buf + written, PAGE_SIZE - written,
+				     "%s%d:%d", delim, node,
+				     unbound_pwq_by_node(wq, node)->pool->id);
+		delim = " ";
+	}
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+	rcu_read_unlock_sched();
+
+	return written;
+}
+
+static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
+	mutex_unlock(&wq->mutex);
+
+	return written;
+}
+
+/* prepare workqueue_attrs for sysfs store operations */
+static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
+{
+	struct workqueue_attrs *attrs;
+
+	attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+
+	mutex_lock(&wq->mutex);
+	copy_workqueue_attrs(attrs, wq->unbound_attrs);
+	mutex_unlock(&wq->mutex);
+	return attrs;
+}
+
+static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
+	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
+		ret = apply_workqueue_attrs(wq, attrs);
+	else
+		ret = -EINVAL;
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static ssize_t wq_cpumask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
+			    cpumask_pr_args(wq->unbound_attrs->cpumask));
+	mutex_unlock(&wq->mutex);
+	return written;
+}
+
+static ssize_t wq_cpumask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, attrs->cpumask);
+	if (!ret)
+		ret = apply_workqueue_attrs(wq, attrs);
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n",
+			    !wq->unbound_attrs->no_numa);
+	mutex_unlock(&wq->mutex);
+
+	return written;
+}
+
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int v, ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = -EINVAL;
+	if (sscanf(buf, "%d", &v) == 1) {
+		attrs->no_numa = !v;
+		ret = apply_workqueue_attrs(wq, attrs);
+	}
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static struct device_attribute wq_sysfs_unbound_attrs[] = {
+	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
+	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
+	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+	__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
+	__ATTR_NULL,
+};
+
+static struct bus_type wq_subsys = {
+	.name				= "workqueue",
+	.dev_groups			= wq_sysfs_groups,
+};
+
+static int __init wq_sysfs_init(void)
+{
+	return subsys_virtual_register(&wq_subsys, NULL);
+}
+core_initcall(wq_sysfs_init);
+
+static void wq_device_release(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	kfree(wq_dev);
+}
+
+/**
+ * workqueue_sysfs_register - make a workqueue visible in sysfs
+ * @wq: the workqueue to register
+ *
+ * Expose @wq in sysfs under /sys/bus/workqueue/devices.
+ * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
+ * which is the preferred method.
+ *
+ * Workqueue user should use this function directly iff it wants to apply
+ * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
+ * apply_workqueue_attrs() may race against userland updating the
+ * attributes.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int workqueue_sysfs_register(struct workqueue_struct *wq)
+{
+	struct wq_device *wq_dev;
+	int ret;
+
+	/*
+	 * Adjusting max_active or creating new pwqs by applyting
+	 * attributes breaks ordering guarantee.  Disallow exposing ordered
+	 * workqueues.
+	 */
+	if (WARN_ON(wq->flags & __WQ_ORDERED))
+		return -EINVAL;
+
+	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
+	if (!wq_dev)
+		return -ENOMEM;
+
+	wq_dev->wq = wq;
+	wq_dev->dev.bus = &wq_subsys;
+	wq_dev->dev.init_name = wq->name;
+	wq_dev->dev.release = wq_device_release;
+
+	/*
+	 * unbound_attrs are created separately.  Suppress uevent until
+	 * everything is ready.
+	 */
+	dev_set_uevent_suppress(&wq_dev->dev, true);
+
+	ret = device_register(&wq_dev->dev);
+	if (ret) {
+		kfree(wq_dev);
+		wq->wq_dev = NULL;
+		return ret;
+	}
+
+	if (wq->flags & WQ_UNBOUND) {
+		struct device_attribute *attr;
+
+		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
+			ret = device_create_file(&wq_dev->dev, attr);
+			if (ret) {
+				device_unregister(&wq_dev->dev);
+				wq->wq_dev = NULL;
+				return ret;
+			}
+		}
+	}
+
+	dev_set_uevent_suppress(&wq_dev->dev, false);
+	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
+	return 0;
+}
+
+/**
+ * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
+ * @wq: the workqueue to unregister
+ *
+ * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
+ */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
+{
+	struct wq_device *wq_dev = wq->wq_dev;
+
+	if (!wq->wq_dev)
+		return;
+
+	wq->wq_dev = NULL;
+	device_unregister(&wq_dev->dev);
+}
+#else	/* CONFIG_SYSFS */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
+#endif	/* CONFIG_SYSFS */
+
 static void __init wq_numa_init(void)
 {
 	cpumask_var_t *tbl;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3..36b6fa8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@
 	  data corruption or a sporadic crash at a later stage once the region
 	  is examined. The runtime overhead introduced is minimal.
 
+config DEBUG_TIMEKEEPING
+	bool "Enable extra timekeeping sanity checking"
+	help
+	  This option will enable additional timekeeping sanity checks
+	  which may be helpful when diagnosing issues where timekeeping
+	  problems are suspected.
+
+	  This may include checks in the timekeeping hotpaths, so this
+	  option may have a (very small) performance impact to some
+	  workloads.
+
+	  If unsure, say N.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff --git a/lib/lockref.c b/lib/lockref.c
index ecb9a66..494994b 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -18,7 +18,7 @@
 #define CMPXCHG_LOOP(CODE, SUCCESS) do {					\
 	struct lockref old;							\
 	BUILD_BUG_ON(sizeof(old) != 8);						\
-	old.lock_count = ACCESS_ONCE(lockref->lock_count);			\
+	old.lock_count = READ_ONCE(lockref->lock_count);			\
 	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
 		struct lockref new = old, prev = old;				\
 		CODE								\
diff --git a/mm/mremap.c b/mm/mremap.c
index 57dadc0..2dc44b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -286,8 +286,14 @@
 		old_len = new_len;
 		old_addr = new_addr;
 		new_addr = -ENOMEM;
-	} else if (vma->vm_file && vma->vm_file->f_op->mremap)
-		vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+	} else if (vma->vm_file && vma->vm_file->f_op->mremap) {
+		err = vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+		if (err < 0) {
+			move_page_tables(new_vma, new_addr, vma, old_addr,
+					 moved_len, true);
+			return err;
+		}
+	}
 
 	/* Conceal VM_ACCOUNT so old reservation is not undone */
 	if (vm_flags & VM_ACCOUNT) {
diff --git a/mm/percpu.c b/mm/percpu.c
index 73c97a5..dfd0248 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1310,7 +1310,7 @@
  * and, from the second one, the backing allocator (currently either vm or
  * km) provides translation.
  *
- * The addr can be tranlated simply without checking if it falls into the
+ * The addr can be translated simply without checking if it falls into the
  * first chunk. But the current code reflects better how percpu allocator
  * actually works, and the verification can discover both bugs in percpu
  * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
@@ -1762,7 +1762,7 @@
  * and other parameters considering needed percpu size, allocation
  * atom size and distances between CPUs.
  *
- * Groups are always mutliples of atom size and CPUs which are of
+ * Groups are always multiples of atom size and CPUs which are of
  * LOCAL_DISTANCE both ways are grouped together and share space for
  * units in the same group.  The returned configuration is guaranteed
  * to have CPUs on different nodes on different groups and >=75% usage
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 6b3f54e..a9f4ae4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -484,7 +484,7 @@
 			       IPPROTO_TCP, &sock);
 	if (ret)
 		return ret;
-	sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC;
+	sock->sk->sk_allocation = GFP_NOFS;
 
 #ifdef CONFIG_LOCKDEP
 	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
@@ -520,8 +520,6 @@
 			       ret);
 	}
 
-	sk_set_memalloc(sock->sk);
-
 	con->sock = sock;
 	return 0;
 }
@@ -2808,11 +2806,8 @@
 {
 	struct ceph_connection *con = container_of(work, struct ceph_connection,
 						   work.work);
-	unsigned long pflags = current->flags;
 	bool fault;
 
-	current->flags |= PF_MEMALLOC;
-
 	mutex_lock(&con->mutex);
 	while (true) {
 		int ret;
@@ -2866,8 +2861,6 @@
 		con_fault_finish(con);
 
 	con->ops->put(con);
-
-	tsk_restore_flags(current, pflags, PF_MEMALLOC);
 }
 
 /*
diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c
index a422aaa..9ee25a6 100644
--- a/sound/firewire/bebob/bebob_maudio.c
+++ b/sound/firewire/bebob/bebob_maudio.c
@@ -96,10 +96,10 @@
 	struct fw_device *device = fw_parent_device(unit);
 	int err, rcode;
 	u64 date;
-	__be32 cues[3] = {
-		MAUDIO_BOOTLOADER_CUE1,
-		MAUDIO_BOOTLOADER_CUE2,
-		MAUDIO_BOOTLOADER_CUE3
+	__le32 cues[3] = {
+		cpu_to_le32(MAUDIO_BOOTLOADER_CUE1),
+		cpu_to_le32(MAUDIO_BOOTLOADER_CUE2),
+		cpu_to_le32(MAUDIO_BOOTLOADER_CUE3)
 	};
 
 	/* check date of software used to build */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7438213..f9d12c0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2912,6 +2912,8 @@
 
 	if (!hp_pin)
 		return;
+
+	msleep(30);
 	hp_pin_sense = snd_hda_jack_detect(codec, hp_pin);
 
 	/* Index 0x43 Direct Drive HP AMP LPM Control 1 */
@@ -3607,6 +3609,7 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0255:
+	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
 		break;
 	case 0x10ec0233:
@@ -3662,6 +3665,7 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0255:
+	case 0x10ec0256:
 		alc_write_coef_idx(codec, 0x45, 0xc489);
 		snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
 		alc_process_coef_fw(codec, coef0255);
@@ -3731,6 +3735,7 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0255:
+	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
 		break;
 	case 0x10ec0233:
@@ -3785,6 +3790,7 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0255:
+	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
 		break;
 	case 0x10ec0233:
@@ -3839,6 +3845,7 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0255:
+	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
 		break;
 	case 0x10ec0233:
@@ -3884,6 +3891,7 @@
 
 	switch (codec->vendor_id) {
 	case 0x10ec0255:
+	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
 		msleep(300);
 		val = alc_read_coef_idx(codec, 0x46);
@@ -4364,6 +4372,7 @@
 	ALC269_FIXUP_QUANTA_MUTE,
 	ALC269_FIXUP_LIFEBOOK,
 	ALC269_FIXUP_LIFEBOOK_EXTMIC,
+	ALC269_FIXUP_LIFEBOOK_HP_PIN,
 	ALC269_FIXUP_AMIC,
 	ALC269_FIXUP_DMIC,
 	ALC269VB_FIXUP_AMIC,
@@ -4517,6 +4526,13 @@
 			{ }
 		},
 	},
+	[ALC269_FIXUP_LIFEBOOK_HP_PIN] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x21, 0x0221102f }, /* HP out */
+			{ }
+		},
+	},
 	[ALC269_FIXUP_AMIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -5010,6 +5026,7 @@
 	SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
 	SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
 	SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+	SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
 	SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
 	SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
 	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
@@ -5217,6 +5234,16 @@
 		{0x17, 0x40000000},
 		{0x1d, 0x40700001},
 		{0x21, 0x02211050}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60140},
+		{0x13, 0x40000000},
+		{0x14, 0x90170110},
+		{0x19, 0x411111f0},
+		{0x1a, 0x411111f0},
+		{0x1b, 0x411111f0},
+		{0x1d, 0x40700001},
+		{0x1e, 0x411111f0},
+		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
 		{0x12, 0x90a60130},
 		{0x13, 0x40000000},
diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c
index 9974f20..474cae8 100644
--- a/sound/soc/codecs/pcm512x.c
+++ b/sound/soc/codecs/pcm512x.c
@@ -1156,25 +1156,6 @@
 				ret, pcm512x->pll_out);
 			return ret;
 		}
-
-		gpio = PCM512x_G1OE << (4 - 1);
-		ret = regmap_update_bits(pcm512x->regmap, PCM512x_GPIO_EN,
-					 gpio, gpio);
-		if (ret != 0) {
-			dev_err(codec->dev, "Failed to enable gpio %d: %d\n",
-				4, ret);
-			return ret;
-		}
-
-		gpio = PCM512x_GPIO_OUTPUT_1 + 4 - 1;
-		ret = regmap_update_bits(pcm512x->regmap, gpio,
-					 PCM512x_GxSL, PCM512x_GxSL_PLLLK);
-		if (ret != 0) {
-			dev_err(codec->dev,
-				"Failed to output pll lock on %d: %d\n",
-				ret, 4);
-			return ret;
-		}
 	}
 
 	ret = regmap_update_bits(pcm512x->regmap, PCM512x_SYNCHRONIZE,
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index dc9df00..337c317 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -192,6 +192,7 @@
 	{ USB_ID(0x041e, 0x3040), 2, 2, 6, 6,  2,  0x6e91 }, /* Live! 24-bit */
 	{ USB_ID(0x041e, 0x3042), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 */
 	{ USB_ID(0x041e, 0x30df), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 Pro */
+	{ USB_ID(0x041e, 0x3237), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 Pro */
 	{ USB_ID(0x041e, 0x3048), 2, 2, 6, 6,  2,  0x6e91 }, /* Toshiba SB0500 */
 };
 
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 753a47d..9a28365 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1113,8 +1113,13 @@
 
 bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 {
-	/* MS Lifecam HD-5000 doesn't support reading the sample rate. */
-	return chip->usb_id == USB_ID(0x045E, 0x076D);
+	/* devices which do not support reading the sample rate. */
+	switch (chip->usb_id) {
+	case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+		return true;
+	}
+	return false;
 }
 
 /* Marantz/Denon USB DACs need a vendor cmd to switch
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d66ab79..8c0c1a2 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,12 +1,12 @@
 
-MEMCPY_FN(__memcpy,
+MEMCPY_FN(memcpy_orig,
 	"x86-64-unrolled",
 	"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
 
-MEMCPY_FN(memcpy_c,
+MEMCPY_FN(__memcpy,
 	"x86-64-movsq",
 	"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
 
-MEMCPY_FN(memcpy_c_e,
+MEMCPY_FN(memcpy_erms,
 	"x86-64-movsb",
 	"movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index fcd9cf0..e4c2c30 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,8 +1,6 @@
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#define Lmemcpy_c globl memcpy_c; memcpy_c
-#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db1d3a2..d3dfb79 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -36,7 +36,7 @@
 		    "Specify length of memory to copy. "
 		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
 	OPT_STRING('r', "routine", &routine, "default",
-		    "Specify routine to copy"),
+		    "Specify routine to copy, \"all\" runs all available routines"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memcpy() invocation this number of times"),
 	OPT_BOOLEAN('c', "cycle", &use_cycle,
@@ -135,55 +135,16 @@
 	const char *const *usage;
 };
 
-static int bench_mem_common(int argc, const char **argv,
-		     const char *prefix __maybe_unused,
-		     struct bench_mem_info *info)
+static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
 {
-	int i;
-	size_t len;
-	double totallen;
+	const struct routine *r = &info->routines[r_idx];
 	double result_bps[2];
 	u64 result_cycle[2];
 
-	argc = parse_options(argc, argv, options,
-			     info->usage, 0);
-
-	if (no_prefault && only_prefault) {
-		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-		return 1;
-	}
-
-	if (use_cycle)
-		init_cycle();
-
-	len = (size_t)perf_atoll((char *)length_str);
-	totallen = (double)len * iterations;
-
 	result_cycle[0] = result_cycle[1] = 0ULL;
 	result_bps[0] = result_bps[1] = 0.0;
 
-	if ((s64)len <= 0) {
-		fprintf(stderr, "Invalid length:%s\n", length_str);
-		return 1;
-	}
-
-	/* same to without specifying either of prefault and no-prefault */
-	if (only_prefault && no_prefault)
-		only_prefault = no_prefault = false;
-
-	for (i = 0; info->routines[i].name; i++) {
-		if (!strcmp(info->routines[i].name, routine))
-			break;
-	}
-	if (!info->routines[i].name) {
-		printf("Unknown routine:%s\n", routine);
-		printf("Available routines...\n");
-		for (i = 0; info->routines[i].name; i++) {
-			printf("\t%s ... %s\n",
-			       info->routines[i].name, info->routines[i].desc);
-		}
-		return 1;
-	}
+	printf("Routine %s (%s)\n", r->name, r->desc);
 
 	if (bench_format == BENCH_FORMAT_DEFAULT)
 		printf("# Copying %s Bytes ...\n\n", length_str);
@@ -191,28 +152,17 @@
 	if (!only_prefault && !no_prefault) {
 		/* show both of results */
 		if (use_cycle) {
-			result_cycle[0] =
-				info->do_cycle(&info->routines[i], len, false);
-			result_cycle[1] =
-				info->do_cycle(&info->routines[i], len, true);
+			result_cycle[0] = info->do_cycle(r, len, false);
+			result_cycle[1] = info->do_cycle(r, len, true);
 		} else {
-			result_bps[0] =
-				info->do_gettimeofday(&info->routines[i],
-						len, false);
-			result_bps[1] =
-				info->do_gettimeofday(&info->routines[i],
-						len, true);
+			result_bps[0]   = info->do_gettimeofday(r, len, false);
+			result_bps[1]   = info->do_gettimeofday(r, len, true);
 		}
 	} else {
-		if (use_cycle) {
-			result_cycle[pf] =
-				info->do_cycle(&info->routines[i],
-						len, only_prefault);
-		} else {
-			result_bps[pf] =
-				info->do_gettimeofday(&info->routines[i],
-						len, only_prefault);
-		}
+		if (use_cycle)
+			result_cycle[pf] = info->do_cycle(r, len, only_prefault);
+		else
+			result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
 	}
 
 	switch (bench_format) {
@@ -265,6 +215,60 @@
 		die("unknown format: %d\n", bench_format);
 		break;
 	}
+}
+
+static int bench_mem_common(int argc, const char **argv,
+		     const char *prefix __maybe_unused,
+		     struct bench_mem_info *info)
+{
+	int i;
+	size_t len;
+	double totallen;
+
+	argc = parse_options(argc, argv, options,
+			     info->usage, 0);
+
+	if (no_prefault && only_prefault) {
+		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+		return 1;
+	}
+
+	if (use_cycle)
+		init_cycle();
+
+	len = (size_t)perf_atoll((char *)length_str);
+	totallen = (double)len * iterations;
+
+	if ((s64)len <= 0) {
+		fprintf(stderr, "Invalid length:%s\n", length_str);
+		return 1;
+	}
+
+	/* same to without specifying either of prefault and no-prefault */
+	if (only_prefault && no_prefault)
+		only_prefault = no_prefault = false;
+
+	if (!strncmp(routine, "all", 3)) {
+		for (i = 0; info->routines[i].name; i++)
+			__bench_mem_routine(info, i, len, totallen);
+		return 0;
+	}
+
+	for (i = 0; info->routines[i].name; i++) {
+		if (!strcmp(info->routines[i].name, routine))
+			break;
+	}
+	if (!info->routines[i].name) {
+		printf("Unknown routine:%s\n", routine);
+		printf("Available routines...\n");
+		for (i = 0; info->routines[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       info->routines[i].name, info->routines[i].desc);
+		}
+		return 1;
+	}
+
+	__bench_mem_routine(info, i, len, totallen);
 
 	return 0;
 }
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index a71dff9..f02d028 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,12 +1,12 @@
 
-MEMSET_FN(__memset,
+MEMSET_FN(memset_orig,
 	"x86-64-unrolled",
 	"unrolled memset() in arch/x86/lib/memset_64.S")
 
-MEMSET_FN(memset_c,
+MEMSET_FN(__memset,
 	"x86-64-stosq",
 	"movsq-based memset() in arch/x86/lib/memset_64.S")
 
-MEMSET_FN(memset_c_e,
+MEMSET_FN(memset_erms,
 	"x86-64-stosb",
 	"movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index 9e5af89..de27878 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,8 +1,6 @@
 #define memset MEMSET /* don't hide glibc's memset() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#define Lmemset_c globl memset_c; memset_c
-#define Lmemset_c_e globl memset_c_e; memset_c_e
 #include "../../../arch/x86/lib/memset_64.S"
 
 /*
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h
index 6789d788..3a3a0f1 100644
--- a/tools/perf/util/include/asm/alternative-asm.h
+++ b/tools/perf/util/include/asm/alternative-asm.h
@@ -4,5 +4,6 @@
 /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
 
 #define altinstruction_entry #
+#define ALTERNATIVE_2 #
 
 #endif
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 0db5713..95abddc 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -17,6 +17,7 @@
 TARGETS += timers
 TARGETS += user
 TARGETS += vm
+TARGETS += x86
 #Please keep the TARGETS list alphabetically sorted
 
 TARGETS_HOTPLUG = cpu-hotplug
@@ -55,7 +56,40 @@
 		make -C $$TARGET clean; \
 	done;
 
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+	@# Ask all targets to install their files
+	mkdir -p $(INSTALL_PATH)
+	for TARGET in $(TARGETS); do \
+		mkdir -p $(INSTALL_PATH)/$$TARGET ; \
+		make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+	done;
+
+	@# Ask all targets to emit their test scripts
+	echo "#!/bin/bash" > $(ALL_SCRIPT)
+	echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
+	echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+
+	for TARGET in $(TARGETS); do \
+		echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+		echo "echo ========================================" >> $(ALL_SCRIPT); \
+		echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+		make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+		echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+	done;
+
+	chmod u+x $(ALL_SCRIPT)
+else
+	$(error Error: set INSTALL_PATH to use install)
+endif
+
 clean:
 	for TARGET in $(TARGETS); do \
 		make -C $$TARGET clean; \
 	done;
+
+.PHONY: install
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index e18b42b..1822356 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -16,8 +16,9 @@
 	echo "Not an x86 target, can't build breakpoints selftests"
 endif
 
-run_tests:
-	@./breakpoint_test || echo "breakpoints selftests: [FAIL]"
+TEST_PROGS := breakpoint_test
+
+include ../lib.mk
 
 clean:
 	rm -fr breakpoint_test
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
index e9c28d8..fe1f991 100644
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -1,9 +1,10 @@
 all:
 
-run_tests:
-	@/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
 
 run_full_test:
-	@/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+	@/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
old mode 100644
new mode 100755
similarity index 100%
rename from tools/testing/selftests/cpu-hotplug/on-off-test.sh
rename to tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
index 29e8c6b..736c3dd 100644
--- a/tools/testing/selftests/efivarfs/Makefile
+++ b/tools/testing/selftests/efivarfs/Makefile
@@ -1,12 +1,13 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 
 test_objs = open-unlink create-read
 
 all: $(test_objs)
 
-run_tests: all
-	@/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]"
+TEST_PROGS := efivarfs.sh
+TEST_FILES := $(test_objs)
+
+include ../lib.mk
 
 clean:
 	rm -f $(test_objs)
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 66dfc2c..4edb7d0 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,4 +1,3 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 BINARIES = execveat
 DEPS = execveat.symlink execveat.denatured script subdir
@@ -18,8 +17,12 @@
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^
 
-run_tests: all
-	./execveat
+TEST_PROGS := execveat
+TEST_FILES := $(DEPS)
+
+include ../lib.mk
+
+override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\""
 
 clean:
 	rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx*
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index e23cce0..9bf8223 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -3,25 +3,9 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-fw_filesystem:
-	@if /bin/sh ./fw_filesystem.sh ; then \
-                echo "fw_filesystem: ok"; \
-        else \
-                echo "fw_filesystem: [FAIL]"; \
-                exit 1; \
-        fi
+TEST_PROGS := fw_filesystem.sh fw_userhelper.sh
 
-fw_userhelper:
-	@if /bin/sh ./fw_userhelper.sh ; then \
-                echo "fw_userhelper: ok"; \
-        else \
-                echo "fw_userhelper: [FAIL]"; \
-                exit 1; \
-        fi
-
-run_tests: all fw_filesystem fw_userhelper
+include ../lib.mk
 
 # Nothing to clean up.
 clean:
-
-.PHONY: all clean run_tests fw_filesystem fw_userhelper
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 76cc9f1..3467206 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,7 +1,8 @@
 all:
 
-run_tests:
-	@/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]"
+TEST_PROGS := ftracetest
+
+include ../lib.mk
 
 clean:
 	rm -rf logs/*
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
index fd9c49a..aa51f6c 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
@@ -2,4 +2,4 @@
 # description: Basic event tracing check
 test -f available_events -a -f set_event -a -d events
 # check scheduler events are available
-grep -q sched available_events && exit 0 || exit -1
\ No newline at end of file
+grep -q sched available_events && exit 0 || exit $FAIL
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
index 668616d..87eb9d6 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -9,7 +9,11 @@
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@
 do_reset
 
 echo 'sched:sched_switch' > set_event
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -eq 0 ]; then
@@ -31,7 +36,8 @@
 do_reset
 
 echo 1 > events/sched/sched_switch/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -eq 0 ]; then
@@ -41,7 +47,8 @@
 do_reset
 
 echo 0 > events/sched/sched_switch/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -ne 0 ]; then
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index 655c415..ced27ef 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -9,7 +9,11 @@
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@
 do_reset
 
 echo 'sched:*' > set_event
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
@@ -31,7 +36,8 @@
 do_reset
 
 echo 1 > events/sched/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
@@ -41,7 +47,8 @@
 do_reset
 
 echo 0 > events/sched/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -ne 0 ]; then
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
index 4808457..0bb5df3 100644
--- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -9,7 +9,11 @@
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
@@ -21,6 +25,9 @@
 do_reset
 
 echo '*:*' > set_event
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -eq 0 ]; then
     fail "none of events are recorded"
@@ -29,6 +36,9 @@
 do_reset
 
 echo 1 > events/enable
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -eq 0 ]; then
     fail "none of events are recorded"
@@ -37,6 +47,9 @@
 do_reset
 
 echo 0 > events/enable
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -ne 0 ]; then
     fail "any of events should not be recorded"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
index c15e018..15c2dba 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -16,7 +16,9 @@
 
 do_reset() {
     reset_tracer
-    echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+	    echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    fi
     enable_tracing
     clear_trace
     echo > set_ftrace_filter
@@ -25,7 +27,7 @@
 fail() { # msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 disable_tracing
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
index 6af5f63..0ab2189 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -17,7 +17,7 @@
 fail() { # msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 disable_tracing
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
index 2e719cb..7808336 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -31,7 +31,7 @@
     reset_tracer
     echo > set_ftrace_filter
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 echo "Testing function tracer with profiler:"
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
new file mode 100755
index 0000000..17d5bd0
--- /dev/null
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+# main
+main()
+{
+	if [ "$#" -eq 0 ]; then
+		echo "$0: Generating default compression gzip"
+		copts="cvzf"
+		ext=".tar.gz"
+	else
+		case "$1" in
+			tar)
+				copts="cvf"
+				ext=".tar"
+				;;
+			targz)
+				copts="cvzf"
+				ext=".tar.gz"
+				;;
+			tarbz2)
+				copts="cvjf"
+				ext=".tar.bz2"
+				;;
+			tarxz)
+				copts="cvJf"
+				ext=".tar.xz"
+				;;
+			*)
+			echo "Unknown tarball format $1"
+			exit 1
+			;;
+	esac
+	fi
+
+	install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
index 74bbefd..25d2e70 100644
--- a/tools/testing/selftests/ipc/Makefile
+++ b/tools/testing/selftests/ipc/Makefile
@@ -12,14 +12,11 @@
 CFLAGS += -I../../../../usr/include/
 
 all:
-ifeq ($(ARCH),x86)
-	gcc $(CFLAGS) msgque.c -o msgque_test
-else
-	echo "Not an x86 target, can't build msgque selftest"
-endif
+	$(CC) $(CFLAGS) msgque.c -o msgque_test
 
-run_tests: all
-	./msgque_test
+TEST_PROGS := msgque_test
+
+include ../lib.mk
 
 clean:
 	rm -fr ./msgque_test
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index ff0eefd..2ae7450 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -1,10 +1,10 @@
-CC := $(CROSS_COMPILE)$(CC)
 CFLAGS += -I../../../../usr/include/
 
 all: kcmp_test
 
-run_tests: all
-	@./kcmp_test || echo "kcmp_test: [FAIL]"
+TEST_PROGS := kcmp_test
+
+include ../lib.mk
 
 clean:
 	$(RM) kcmp_test kcmp-test-file
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
new file mode 100755
index 0000000..1555fbd
--- /dev/null
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+install_loc=`pwd`
+
+main()
+{
+	if [ $(basename $install_loc) !=  "selftests" ]; then
+		echo "$0: Please run it in selftests directory ..."
+		exit 1;
+	fi
+	if [ "$#" -eq 0 ]; then
+		echo "$0: Installing in default location - $install_loc ..."
+	elif [ ! -d "$1" ]; then
+		echo "$0: $1 doesn't exist!!"
+		exit 1;
+	else
+		install_loc=$1
+		echo "$0: Installing in specified location - $install_loc ..."
+	fi
+
+	install_dir=$install_loc/kselftest
+
+# Create install directory
+	mkdir -p $install_dir
+# Build tests
+	INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
new file mode 100644
index 0000000..2194155
--- /dev/null
+++ b/tools/testing/selftests/lib.mk
@@ -0,0 +1,35 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+CC := $(CROSS_COMPILE)gcc
+
+define RUN_TESTS
+	@for TEST in $(TEST_PROGS); do \
+		(./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
+	done;
+endef
+
+run_tests: all
+	$(RUN_TESTS)
+
+define INSTALL_RULE
+	mkdir -p $(INSTALL_PATH)
+	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+endef
+
+install: all
+ifdef INSTALL_PATH
+	$(INSTALL_RULE)
+else
+	$(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+	@for TEST in $(TEST_PROGS); do \
+		echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
+	done;
+endef
+
+emit_tests:
+	$(EMIT_TESTS)
+
+.PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index b80cd10..3e7eb79 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -1,17 +1,19 @@
+CC = $(CROSS_COMPILE)gcc
 CFLAGS += -D_FILE_OFFSET_BITS=64
 CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
 
 all:
-	gcc $(CFLAGS) memfd_test.c -o memfd_test
+	$(CC) $(CFLAGS) memfd_test.c -o memfd_test
 
-run_tests: all
-	gcc $(CFLAGS) memfd_test.c -o memfd_test
-	@./memfd_test || echo "memfd_test: [FAIL]"
+TEST_PROGS := memfd_test
+
+include ../lib.mk
 
 build_fuse:
-	gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
-	gcc $(CFLAGS) fuse_test.c -o fuse_test
+	$(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
+	$(CC) $(CFLAGS) fuse_test.c -o fuse_test
 
 run_fuse: build_fuse
 	@./run_fuse_test.sh || echo "fuse_test: [FAIL]"
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index d46b8d4..afb2624 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -1,9 +1,12 @@
 all:
 
-run_tests:
-	@/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]"
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]"
+override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 run_full_test:
-	@/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
+	@/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
old mode 100644
new mode 100755
similarity index 100%
rename from tools/testing/selftests/memory-hotplug/on-off-test.sh
rename to tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
new file mode 100644
index 0000000..856ad41
--- /dev/null
+++ b/tools/testing/selftests/mount/.gitignore
@@ -0,0 +1 @@
+unprivileged-remount-test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index 337d853..95580a9 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -1,17 +1,16 @@
 # Makefile for mount selftests.
-
+CFLAGS = -Wall \
+         -O2
 all: unprivileged-remount-test
 
 unprivileged-remount-test: unprivileged-remount-test.c
-	gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
+	$(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
 
-# Allow specific tests to be selected.
-test_unprivileged_remount: unprivileged-remount-test
-	@if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+include ../lib.mk
 
-run_tests: all test_unprivileged_remount
+TEST_PROGS := unprivileged-remount-test
+override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 clean:
 	rm -f unprivileged-remount-test
-
-.PHONY: all test_unprivileged_remount
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 8056e2e..0e3b41e 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -1,10 +1,22 @@
-all:
-	gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
-	gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+CFLAGS = -O2
 
-run_tests:
-	@./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
-	@./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
+all:
+	$(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt
+	$(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+
+include ../lib.mk
+
+override define RUN_TESTS
+	@./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+	@./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+endef
+
+TEST_PROGS := mq_open_tests mq_perf_tests
+
+override define EMIT_TESTS
+	echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
+	echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
+endef
 
 clean:
 	rm -f mq_open_tests mq_perf_tests
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 62f22cc..fac4782 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -1,6 +1,5 @@
 # Makefile for net selftests
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -O2 -g
 
 CFLAGS += -I../../../../usr/include/
@@ -11,9 +10,10 @@
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^
 
-run_tests: all
-	@/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
-	@/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
-	./test_bpf.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
+TEST_FILES := $(NET_PROGS)
+
+include ../lib.mk
+
 clean:
 	$(RM) $(NET_PROGS)
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 1d5e7ad..2958fe9a 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -8,10 +8,9 @@
 
 GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
 
-CC := $(CROSS_COMPILE)$(CC)
 CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
 
-export CC CFLAGS
+export CFLAGS
 
 TARGETS = pmu copyloops mm tm primitives stringloops
 
@@ -22,10 +21,25 @@
 $(TARGETS):
 	$(MAKE) -k -C $@ all
 
-run_tests: all
+include ../lib.mk
+
+override define RUN_TESTS
 	@for TARGET in $(TARGETS); do \
 		$(MAKE) -C $$TARGET run_tests; \
 	done;
+endef
+
+override define INSTALL_RULE
+	@for TARGET in $(TARGETS); do \
+		$(MAKE) -C $$TARGET install; \
+	done;
+endef
+
+override define EMIT_TESTS
+	@for TARGET in $(TARGETS); do \
+		$(MAKE) -s -C $$TARGET emit_tests; \
+	done;
+endef
 
 clean:
 	@for TARGET in $(TARGETS); do \
@@ -36,4 +50,4 @@
 tags:
 	find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: all run_tests clean tags $(TARGETS)
+.PHONY: tags $(TARGETS)
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 6f2d3be..c050235 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -6,24 +6,19 @@
 # Use our CFLAGS for the implicit .S rule
 ASFLAGS = $(CFLAGS)
 
-PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
 copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
 copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
 memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
 memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
 
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
-run_tests: all
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../lib.mk
 
 clean:
-	rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index a14c538..41cc3ed 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -1,21 +1,16 @@
 noarg:
 	$(MAKE) -C ../
 
-PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_PROGS := hugetlb_vs_thp_test subpage_prot
 
-all: $(PROGS) tempfile
+all: $(TEST_PROGS) tempfile
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../lib.mk
 
 tempfile:
 	dd if=/dev/zero of=tempfile bs=64k count=1
 
 clean:
-	rm -f $(PROGS) tempfile
-
-.PHONY: all run_tests clean
+	rm -f $(TEST_PROGS) tempfile
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index c9f4263..5a16117 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -1,38 +1,42 @@
 noarg:
 	$(MAKE) -C ../
 
-PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-SUB_TARGETS = ebb
+all: $(TEST_PROGS) ebb
 
-all: $(PROGS) $(SUB_TARGETS)
-
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
 count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
 	$(CC) $(CFLAGS) -m64 -o $@ $^
 
-run_tests: all sub_run_tests
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../lib.mk
 
-clean: sub_clean
-	rm -f $(PROGS) loop.o
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+	$(DEFAULT_RUN_TESTS)
+	$(MAKE) -C ebb run_tests
+endef
 
-$(SUB_TARGETS):
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+	$(DEFAULT_EMIT_TESTS)
+	$(MAKE) -s -C ebb emit_tests
+endef
+
+DEFAULT_INSTALL := $(INSTALL_RULE)
+override define INSTALL_RULE
+	$(DEFAULT_INSTALL_RULE)
+	$(MAKE) -C ebb install
+endef
+
+clean:
+	rm -f $(TEST_PROGS) loop.o
+	$(MAKE) -C ebb clean
+
+ebb:
 	$(MAKE) -k -C $@ all
 
-sub_run_tests: all
-	@for TARGET in $(SUB_TARGETS); do \
-		$(MAKE) -C $$TARGET run_tests; \
-	done;
-
-sub_clean:
-	@for TARGET in $(SUB_TARGETS); do \
-		$(MAKE) -C $$TARGET clean; \
-	done;
-
-.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
+.PHONY: all run_tests clean ebb
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 3dc4332..5cdc9db 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -4,7 +4,7 @@
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
-PROGS := reg_access_test event_attributes_test cycles_test	\
+TEST_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 cycles_with_freeze_test pmc56_overflow_test		\
 	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
 	 cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test	\
@@ -16,18 +16,15 @@
 	 lost_exception_test no_handler_test			\
 	 cycles_with_mmcr2_test
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
 
 instruction_count_test: ../loop.S
 
 lost_exception_test: ../lib.c
 
-run_tests: all
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../../lib.mk
 
 clean:
-	rm -f $(PROGS)
+	rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index ea737ca..b68c622 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -1,17 +1,12 @@
 CFLAGS += -I$(CURDIR)
 
-PROGS := load_unaligned_zeropad
+TEST_PROGS := load_unaligned_zeropad
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../lib.mk
 
 clean:
-	rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 506d773..2a728f4 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -2,19 +2,14 @@
 CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 
-PROGS := memcmp
+TEST_PROGS := memcmp
 EXTRA_SOURCES := memcmp_64.S ../harness.c
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
-run_tests: all
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../lib.mk
 
 clean:
-	rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 2cede23..34f2ec63 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,15 +1,10 @@
-PROGS := tm-resched-dscr
+TEST_PROGS := tm-resched-dscr
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-	@-for PROG in $(PROGS); do \
-		./$$PROG; \
-	done;
+include ../../lib.mk
 
 clean:
-	rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index 47ae2d3..453927f 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -6,5 +6,6 @@
 clean:
 	rm -f peeksiginfo
 
-run_tests: all
-	@./peeksiginfo || echo "peeksiginfo selftests: [FAIL]"
+TEST_PROGS := peeksiginfo
+
+include ../lib.mk
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
index 04dc25e..bbd0b53 100644
--- a/tools/testing/selftests/size/Makefile
+++ b/tools/testing/selftests/size/Makefile
@@ -1,12 +1,11 @@
-CC = $(CROSS_COMPILE)gcc
-
 all: get_size
 
 get_size: get_size.c
 	$(CC) -static -ffreestanding -nostartfiles -s $< -o $@
 
-run_tests: all
-	./get_size
+TEST_PROGS := get_size
+
+include ../lib.mk
 
 clean:
 	$(RM) get_size
diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
index 0a92ada..b3c33e0 100644
--- a/tools/testing/selftests/sysctl/Makefile
+++ b/tools/testing/selftests/sysctl/Makefile
@@ -4,16 +4,10 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
 all:
 
-# Allow specific tests to be selected.
-test_num:
-	@/bin/sh ./run_numerictests
+TEST_PROGS := run_numerictests run_stringtests
+TEST_FILES := common_tests
 
-test_string:
-	@/bin/sh ./run_stringtests
-
-run_tests: all test_num test_string
+include ../lib.mk
 
 # Nothing to clean up.
 clean:
-
-.PHONY: all run_tests clean test_num test_string
diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index eb2859f..89a3f44 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -1,8 +1,36 @@
-all:
-	gcc posix_timers.c -o posix_timers -lrt
+CC = $(CROSS_COMPILE)gcc
+BUILD_FLAGS = -DKTEST
+CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
+LDFLAGS += -lrt -lpthread
 
-run_tests: all
-	./posix_timers
+# these are all "safe" tests that don't modify
+# system time or require escalated privledges
+TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+	     inconsistency-check raw_skew threadtest rtctest
+
+TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \
+		      skew_consistency clocksource-switch leap-a-day \
+		      leapcrash set-tai set-2038
+
+bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED)
+
+all: ${bins}
+
+include ../lib.mk
+
+# these tests require escalated privledges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+	./alarmtimer-suspend
+	./valid-adjtimex
+	./change_skew
+	./skew_consistency
+	./clocksource-switch
+	./leap-a-day -s -i 10
+	./leapcrash
+	./set-tai
+	./set-2038
 
 clean:
-	rm -f ./posix_timers
+	rm -f ${bins}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
new file mode 100644
index 0000000..aaffbde
--- /dev/null
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -0,0 +1,185 @@
+/* alarmtimer suspend test
+ *		John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the alarmtimer & RTC wakeup code is
+ *   functioning.
+ *
+ *  To build:
+ *	$ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+	long long delta_ns;
+	struct timespec ts;
+
+	clock_gettime(alarm_clock_id, &ts);
+	alarmcount++;
+
+	delta_ns = timespec_sub(start_time, ts);
+	delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+	printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+							ts.tv_nsec, delta_ns);
+
+	if (delta_ns > UNREASONABLE_LAT) {
+		printf("[FAIL]\n");
+		final_ret = -1;
+	} else
+		printf("[OK]\n");
+
+}
+
+int main(void)
+{
+	timer_t tm1;
+	struct itimerspec its1, its2;
+	struct sigevent se;
+	struct sigaction act;
+	int signum = SIGRTMAX;
+
+	/* Set up signal handler: */
+	sigfillset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = sigalarm;
+	sigaction(signum, &act, NULL);
+
+	/* Set up timer: */
+	memset(&se, 0, sizeof(se));
+	se.sigev_notify = SIGEV_SIGNAL;
+	se.sigev_signo = signum;
+	se.sigev_value.sival_int = 0;
+
+	for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+			alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+			alarm_clock_id++) {
+
+		alarmcount = 0;
+		timer_create(alarm_clock_id, &se, &tm1);
+
+		clock_gettime(alarm_clock_id, &start_time);
+		printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+				start_time.tv_sec, start_time.tv_nsec);
+		printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+		its1.it_value = start_time;
+		its1.it_value.tv_sec += SUSPEND_SECS;
+		its1.it_interval.tv_sec = SUSPEND_SECS;
+		its1.it_interval.tv_nsec = 0;
+
+		timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+		while (alarmcount < 5)
+			sleep(1); /* First 5 alarms, do nothing */
+
+		printf("Starting suspend loops\n");
+		while (alarmcount < 10) {
+			int ret;
+
+			sleep(1);
+			ret = system("echo mem > /sys/power/state");
+			if (ret)
+				break;
+		}
+		timer_delete(tm1);
+	}
+	if (final_ret)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
new file mode 100644
index 0000000..cb19689
--- /dev/null
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -0,0 +1,107 @@
+/* ADJ_FREQ Skew change test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the raw_skew, inconsistency-check and nanosleep tests be
+ *  present in the same directory it is run from.
+ *
+ *  To build:
+ *	$ gcc change_skew.c -o change_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+	struct timex tx;
+	int ret;
+
+	tx.modes = ADJ_FREQUENCY;
+	tx.freq = ppm << 16;
+
+	ret = adjtimex(&tx);
+	if (ret < 0) {
+		printf("Error adjusting freq\n");
+		return ret;
+	}
+
+	ret = system("./raw_skew");
+	ret |= system("./inconsistency-check");
+	ret |= system("./nanosleep");
+
+	return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+	struct timex tx;
+	int i, ret;
+
+	int ppm[5] = {0, 250, 500, -250, -500};
+
+	/* Kill ntpd */
+	ret = system("killall -9 ntpd");
+
+	/* Make sure there's no offset adjustment going on */
+	tx.modes = ADJ_OFFSET;
+	tx.offset = 0;
+	ret = adjtimex(&tx);
+
+	if (ret < 0) {
+		printf("Maybe you're not running as root?\n");
+		return -1;
+	}
+
+	for (i = 0; i < 5; i++) {
+		printf("Using %i ppm adjustment\n", ppm[i]);
+		ret = change_skew_test(ppm[i]);
+		if (ret)
+			break;
+	}
+
+	/* Set things back */
+	tx.modes = ADJ_FREQUENCY;
+	tx.offset = 0;
+	adjtimex(&tx);
+
+	if (ret) {
+		printf("[FAIL]");
+		return ksft_exit_fail();
+	}
+	printf("[OK]");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
new file mode 100644
index 0000000..627ec74
--- /dev/null
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -0,0 +1,179 @@
+/* Clocksource change test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which quickly changes the clocksourc and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the inconsistency-check and nanosleep tests be present in the
+ *  same directory it is run from.
+ *
+ *  To build:
+ *	$ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+
+int get_clocksources(char list[][30])
+{
+	int fd, i;
+	size_t size;
+	char buf[512];
+	char *head, *tmp;
+
+	fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+	size = read(fd, buf, 512);
+
+	close(fd);
+
+	for (i = 0; i < 30; i++)
+		list[i][0] = '\0';
+
+	head = buf;
+	i = 0;
+	while (head - buf < size) {
+		/* Find the next space */
+		for (tmp = head; *tmp != ' '; tmp++) {
+			if (*tmp == '\n')
+				break;
+			if (*tmp == '\0')
+				break;
+		}
+		*tmp = '\0';
+		strcpy(list[i], head);
+		head = tmp + 1;
+		i++;
+	}
+
+	return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+	int fd;
+
+	fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+	size = read(fd, buf, size);
+
+	return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+	int fd;
+	size_t size;
+
+	fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+	if (fd < 0)
+		return -1;
+
+	size = write(fd, clocksource, strlen(clocksource));
+
+	if (size < 0)
+		return -1;
+
+	close(fd);
+	return 0;
+}
+
+
+int run_tests(int secs)
+{
+	int ret;
+	char buf[255];
+
+	sprintf(buf, "./inconsistency-check -t %i", secs);
+	ret = system(buf);
+	if (ret)
+		return ret;
+	ret = system("./nanosleep");
+	return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+	char orig_clk[512];
+	int count, i, status;
+	pid_t pid;
+
+	get_cur_clocksource(orig_clk, 512);
+
+	count = get_clocksources(clocksource_list);
+
+	if (change_clocksource(clocksource_list[0])) {
+		printf("Error: You probably need to run this as root\n");
+		return -1;
+	}
+
+	/* Check everything is sane before we start switching asyncrhonously */
+	for (i = 0; i < count; i++) {
+		printf("Validating clocksource %s\n", clocksource_list[i]);
+		if (change_clocksource(clocksource_list[i])) {
+			status = -1;
+			goto out;
+		}
+		if (run_tests(5)) {
+			status = -1;
+			goto out;
+		}
+	}
+
+
+	printf("Running Asyncrhonous Switching Tests...\n");
+	pid = fork();
+	if (!pid)
+		return run_tests(60);
+
+	while (pid != waitpid(pid, &status, WNOHANG))
+		for (i = 0; i < count; i++)
+			if (change_clocksource(clocksource_list[i])) {
+				status = -1;
+				goto out;
+			}
+out:
+	change_clocksource(orig_clk);
+
+	if (status)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
new file mode 100644
index 0000000..caf1bc9
--- /dev/null
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -0,0 +1,204 @@
+/* Time inconsistency check test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2003, 2004, 2005, 2012
+ *		(C) Copyright Linaro Limited 2015
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+	/* use unsigned to avoid false positives on 2038 rollover */
+	if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+		return 1;
+	if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+		return 0;
+	if (a.tv_nsec > b.tv_nsec)
+		return 0;
+	return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+	struct timespec list[CALLS_PER_LOOP];
+	int i, inconsistent;
+	long now, then;
+	time_t t;
+	char *start_str;
+
+	clock_gettime(clock_type, &list[0]);
+	now = then = list[0].tv_sec;
+
+	/* timestamp start of test */
+	t = time(0);
+	start_str = ctime(&t);
+
+	while (seconds == -1 || now - then < seconds) {
+		inconsistent = 0;
+
+		/* Fill list */
+		for (i = 0; i < CALLS_PER_LOOP; i++)
+			clock_gettime(clock_type, &list[i]);
+
+		/* Check for inconsistencies */
+		for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+			if (!in_order(list[i], list[i+1]))
+				inconsistent = i;
+
+		/* display inconsistency */
+		if (inconsistent) {
+			unsigned long long delta;
+
+			printf("\%s\n", start_str);
+			for (i = 0; i < CALLS_PER_LOOP; i++) {
+				if (i == inconsistent)
+					printf("--------------------\n");
+				printf("%lu:%lu\n", list[i].tv_sec,
+							list[i].tv_nsec);
+				if (i == inconsistent + 1)
+					printf("--------------------\n");
+			}
+			delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+			delta += list[inconsistent].tv_nsec;
+			delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+			delta -= list[inconsistent+1].tv_nsec;
+			printf("Delta: %llu ns\n", delta);
+			fflush(0);
+			/* timestamp inconsistency*/
+			t = time(0);
+			printf("%s\n", ctime(&t));
+			printf("[FAILED]\n");
+			return -1;
+		}
+		now = list[0].tv_sec;
+	}
+	printf("[OK]\n");
+	return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+	int clockid, opt;
+	int userclock = CLOCK_REALTIME;
+	int maxclocks = NR_CLOCKIDS;
+	int runtime = 10;
+	struct timespec ts;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+		switch (opt) {
+		case 't':
+			runtime = atoi(optarg);
+			break;
+		case 'c':
+			userclock = atoi(optarg);
+			maxclocks = userclock + 1;
+			break;
+		default:
+			printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+			printf("	-t: Number of seconds to run\n");
+			printf("	-c: clockid to use (default, all clockids)\n");
+			exit(-1);
+		}
+	}
+
+	setbuf(stdout, NULL);
+
+	for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+		if (clockid == CLOCK_HWSPECIFIC)
+			continue;
+
+		if (!clock_gettime(clockid, &ts)) {
+			printf("Consistent %-30s ", clockstring(clockid));
+			if (consistency_test(clockid, runtime))
+				return ksft_exit_fail();
+		}
+	}
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
new file mode 100644
index 0000000..b8272e6
--- /dev/null
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -0,0 +1,319 @@
+/* Leap second stress test
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPLv2
+ *
+ *  This test signals the kernel to insert a leap second
+ *  every day at midnight GMT. This allows for stessing the
+ *  kernel's leap-second behavior, as well as how well applications
+ *  handle the leap-second discontinuity.
+ *
+ *  Usage: leap-a-day [-s] [-i <num>]
+ *
+ *  Options:
+ *	-s:	Each iteration, set the date to 10 seconds before midnight GMT.
+ *		This speeds up the number of leapsecond transitions tested,
+ *		but because it calls settimeofday frequently, advancing the
+ *		time by 24 hours every ~16 seconds, it may cause application
+ *		disruption.
+ *
+ *	-i:	Number of iterations to run (default: infinite)
+ *
+ *  Other notes: Disabling NTP prior to running this is advised, as the two
+ *		 may conflict in their commands to the kernel.
+ *
+ *  To build:
+ *	$ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+	if (a.tv_sec < b.tv_sec)
+		return 1;
+	if (a.tv_sec > b.tv_sec)
+		return 0;
+	if (a.tv_nsec > b.tv_nsec)
+		return 0;
+	return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+char *time_state_str(int state)
+{
+	switch (state) {
+	case TIME_OK:	return "TIME_OK";
+	case TIME_INS:	return "TIME_INS";
+	case TIME_DEL:	return "TIME_DEL";
+	case TIME_OOP:	return "TIME_OOP";
+	case TIME_WAIT:	return "TIME_WAIT";
+	case TIME_BAD:	return "TIME_BAD";
+	}
+	return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+	struct timex tx;
+	int ret;
+
+	/*
+	 * We have to call adjtime twice here, as kernels
+	 * prior to 6b1859dba01c7 (included in 3.5 and
+	 * -stable), had an issue with the state machine
+	 * and wouldn't clear the STA_INS/DEL flag directly.
+	 */
+	tx.modes = ADJ_STATUS;
+	tx.status = STA_PLL;
+	ret = adjtimex(&tx);
+
+	/* Clear maxerror, as it can cause UNSYNC to be set */
+	tx.modes = ADJ_MAXERROR;
+	tx.maxerror = 0;
+	ret = adjtimex(&tx);
+
+	/* Clear the status */
+	tx.modes = ADJ_STATUS;
+	tx.status = 0;
+	ret = adjtimex(&tx);
+
+	return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+	clear_time_state();
+	exit(0);
+}
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+	struct timespec now, target;
+
+	clock_gettime(CLOCK_REALTIME, &now);
+	target = timespec_add(now, NSEC_PER_SEC/2);
+	clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+	clock_gettime(CLOCK_REALTIME, &now);
+
+	if (!in_order(target, now))
+		printf("ERROR: hrtimer early expiration failure observed.\n");
+}
+
+int main(int argc, char **argv)
+{
+	int settime = 0;
+	int tai_time = 0;
+	int insert = 1;
+	int iterations = -1;
+	int opt;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "sti:")) != -1) {
+		switch (opt) {
+		case 's':
+			printf("Setting time to speed up testing\n");
+			settime = 1;
+			break;
+		case 'i':
+			iterations = atoi(optarg);
+			break;
+		case 't':
+			tai_time = 1;
+			break;
+		default:
+			printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]);
+			printf("	-s: Set time to right before leap second each iteration\n");
+			printf("	-i: Number of iterations\n");
+			printf("	-t: Print TAI time\n");
+			exit(-1);
+		}
+	}
+
+	/* Make sure TAI support is present if -t was used */
+	if (tai_time) {
+		struct timespec ts;
+
+		if (clock_gettime(CLOCK_TAI, &ts)) {
+			printf("System doesn't support CLOCK_TAI\n");
+			ksft_exit_fail();
+		}
+	}
+
+	signal(SIGINT, handler);
+	signal(SIGKILL, handler);
+
+	if (iterations < 0)
+		printf("This runs continuously. Press ctrl-c to stop\n");
+	else
+		printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+	printf("\n");
+	while (1) {
+		int ret;
+		struct timespec ts;
+		struct timex tx;
+		time_t now, next_leap;
+
+		/* Get the current time */
+		clock_gettime(CLOCK_REALTIME, &ts);
+
+		/* Calculate the next possible leap second 23:59:60 GMT */
+		next_leap = ts.tv_sec;
+		next_leap += 86400 - (next_leap % 86400);
+
+		if (settime) {
+			struct timeval tv;
+
+			tv.tv_sec = next_leap - 10;
+			tv.tv_usec = 0;
+			settimeofday(&tv, NULL);
+			printf("Setting time to %s", ctime(&tv.tv_sec));
+		}
+
+		/* Reset NTP time state */
+		clear_time_state();
+
+		/* Set the leap second insert flag */
+		tx.modes = ADJ_STATUS;
+		if (insert)
+			tx.status = STA_INS;
+		else
+			tx.status = STA_DEL;
+		ret = adjtimex(&tx);
+		if (ret < 0) {
+			printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+							time_state_str(ret));
+			return ksft_exit_fail();
+		}
+
+		/* Validate STA_INS was set */
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.status != STA_INS && tx.status != STA_DEL) {
+			printf("Error: STA_INS/STA_DEL not set!: %s\n",
+							time_state_str(ret));
+			return ksft_exit_fail();
+		}
+
+		if (tai_time) {
+			printf("Using TAI time,"
+				" no inconsistencies should be seen!\n");
+		}
+
+		printf("Scheduling leap second for %s", ctime(&next_leap));
+
+		/* Wake up 3 seconds before leap */
+		ts.tv_sec = next_leap - 3;
+		ts.tv_nsec = 0;
+
+		while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+			printf("Something woke us up, returning to sleep\n");
+
+		/* Validate STA_INS is still set */
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.status != STA_INS && tx.status != STA_DEL) {
+			printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+			tx.modes = ADJ_STATUS;
+			if (insert)
+				tx.status = STA_INS;
+			else
+				tx.status = STA_DEL;
+			ret = adjtimex(&tx);
+		}
+
+		/* Check adjtimex output every half second */
+		now = tx.time.tv_sec;
+		while (now < next_leap + 2) {
+			char buf[26];
+			struct timespec tai;
+
+			tx.modes = 0;
+			ret = adjtimex(&tx);
+
+			if (tai_time) {
+				clock_gettime(CLOCK_TAI, &tai);
+				printf("%ld sec, %9ld ns\t%s\n",
+						tai.tv_sec,
+						tai.tv_nsec,
+						time_state_str(ret));
+			} else {
+				ctime_r(&tx.time.tv_sec, buf);
+				buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+				printf("%s + %6ld us (%i)\t%s\n",
+						buf,
+						tx.time.tv_usec,
+						tx.tai,
+						time_state_str(ret));
+			}
+			now = tx.time.tv_sec;
+			/* Sleep for another half second */
+			ts.tv_sec = 0;
+			ts.tv_nsec = NSEC_PER_SEC / 2;
+			clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+		}
+		/* Switch to using other mode */
+		insert = !insert;
+
+		/* Note if kernel has known hrtimer failure */
+		test_hrtimer_failure();
+
+		printf("Leap complete\n\n");
+
+		if ((iterations != -1) && !(--iterations))
+			break;
+	}
+
+	clear_time_state();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
new file mode 100644
index 0000000..a1071bd
--- /dev/null
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -0,0 +1,120 @@
+/* Demo leapsecond deadlock
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ *  To build:
+ *	$ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+	struct timex tx;
+	int ret;
+
+	/*
+	 * We have to call adjtime twice here, as kernels
+	 * prior to 6b1859dba01c7 (included in 3.5 and
+	 * -stable), had an issue with the state machine
+	 * and wouldn't clear the STA_INS/DEL flag directly.
+	 */
+	tx.modes = ADJ_STATUS;
+	tx.status = STA_PLL;
+	ret = adjtimex(&tx);
+
+	tx.modes = ADJ_STATUS;
+	tx.status = 0;
+	ret = adjtimex(&tx);
+
+	return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+	clear_time_state();
+	exit(0);
+}
+
+
+int main(void)
+{
+	struct timex tx;
+	struct timespec ts;
+	time_t next_leap;
+	int count = 0;
+
+	setbuf(stdout, NULL);
+
+	signal(SIGINT, handler);
+	signal(SIGKILL, handler);
+	printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+	clear_time_state();
+
+
+	/* Get the current time */
+	clock_gettime(CLOCK_REALTIME, &ts);
+
+	/* Calculate the next possible leap second 23:59:60 GMT */
+	next_leap = ts.tv_sec;
+	next_leap += 86400 - (next_leap % 86400);
+
+	for (count = 0; count < 20; count++) {
+		struct timeval tv;
+
+
+		/* set the time to 2 seconds before the leap */
+		tv.tv_sec = next_leap - 2;
+		tv.tv_usec = 0;
+		if (settimeofday(&tv, NULL)) {
+			printf("Error: You're likely not running with proper (ie: root) permissions\n");
+			return ksft_exit_fail();
+		}
+		tx.modes = 0;
+		adjtimex(&tx);
+
+		/* hammer on adjtime w/ STA_INS */
+		while (tx.time.tv_sec < next_leap + 1) {
+			/* Set the leap second insert flag */
+			tx.modes = ADJ_STATUS;
+			tx.status = STA_INS;
+			adjtimex(&tx);
+		}
+		clear_time_state();
+		printf(".");
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
new file mode 100644
index 0000000..a2a3924
--- /dev/null
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -0,0 +1,124 @@
+/* Measure mqueue timeout latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *		(C) Copyright Linaro 2013
+ *
+ *		Inspired with permission from example test by:
+ *			Romain Francoise <romain@orebokech.com>
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT		100000000	/* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY	40000000	/* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+	mqd_t q;
+	struct mq_attr attr;
+	struct timespec start, end, now, target;
+	int i, count, ret;
+
+	q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+	if (q < 0) {
+		perror("mq_open");
+		return -1;
+	}
+	mq_getattr(q, &attr);
+
+
+	count = 100;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (i = 0; i < count; i++) {
+		char buf[attr.mq_msgsize];
+
+		clock_gettime(CLOCK_REALTIME, &now);
+		target = now;
+		target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+		ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+		if (ret < 0 && errno != ETIMEDOUT) {
+			perror("mq_timedreceive");
+			return -1;
+		}
+	}
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	mq_close(q);
+
+	if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+		return -1;
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	printf("Mqueue latency :                          ");
+
+	ret = mqueue_lat_test();
+	if (ret < 0) {
+		printf("[FAILED]\n");
+		return ksft_exit_fail();
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
new file mode 100644
index 0000000..8a3c29d
--- /dev/null
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -0,0 +1,174 @@
+/* Make sure timers don't return early
+ *              by: john stultz (johnstul@us.ibm.com)
+ *		    John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright Linaro 2013 2015
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc nanosleep.c -o nanosleep -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+	if (a.tv_sec < b.tv_sec)
+		return 1;
+	if (a.tv_sec > b.tv_sec)
+		return 0;
+	if (a.tv_nsec > b.tv_nsec)
+		return 0;
+	return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+	struct timespec now, target, rel;
+
+	/* First check abs time */
+	if (clock_gettime(clockid, &now))
+		return UNSUPPORTED;
+	target = timespec_add(now, ns);
+
+	if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+		return UNSUPPORTED;
+	clock_gettime(clockid, &now);
+
+	if (!in_order(target, now))
+		return -1;
+
+	/* Second check reltime */
+	clock_gettime(clockid, &now);
+	rel.tv_sec = 0;
+	rel.tv_nsec = 0;
+	rel = timespec_add(rel, ns);
+	target = timespec_add(now, ns);
+	clock_nanosleep(clockid, 0, &rel, NULL);
+	clock_gettime(clockid, &now);
+
+	if (!in_order(target, now))
+		return -1;
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	long long length;
+	int clockid, ret;
+
+	for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+		/* Skip cputime clockids since nanosleep won't increment cputime */
+		if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+				clockid == CLOCK_THREAD_CPUTIME_ID ||
+				clockid == CLOCK_HWSPECIFIC)
+			continue;
+
+		printf("Nanosleep %-31s ", clockstring(clockid));
+
+		length = 10;
+		while (length <= (NSEC_PER_SEC * 10)) {
+			ret = nanosleep_test(clockid, length);
+			if (ret == UNSUPPORTED) {
+				printf("[UNSUPPORTED]\n");
+				goto next;
+			}
+			if (ret < 0) {
+				printf("[FAILED]\n");
+				return ksft_exit_fail();
+			}
+			length *= 100;
+		}
+		printf("[OK]\n");
+next:
+		ret = 0;
+	}
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
new file mode 100644
index 0000000..2d7898f
--- /dev/null
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -0,0 +1,190 @@
+/* Measure nanosleep timer latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *		(C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+	struct timespec start, end, target;
+	long long latency = 0;
+	int i, count;
+
+	target.tv_sec = ns/NSEC_PER_SEC;
+	target.tv_nsec = ns%NSEC_PER_SEC;
+
+	if (clock_gettime(clockid, &start))
+		return UNSUPPORTED;
+	if (clock_nanosleep(clockid, 0, &target, NULL))
+		return UNSUPPORTED;
+
+	count = 10;
+
+	/* First check relative latency */
+	clock_gettime(clockid, &start);
+	for (i = 0; i < count; i++)
+		clock_nanosleep(clockid, 0, &target, NULL);
+	clock_gettime(clockid, &end);
+
+	if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+		printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+		return -1;
+	}
+
+	/* Next check absolute latency */
+	for (i = 0; i < count; i++) {
+		clock_gettime(clockid, &start);
+		target = timespec_add(start, ns);
+		clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+		clock_gettime(clockid, &end);
+		latency += timespec_sub(target, end);
+	}
+
+	if (latency/count > UNRESONABLE_LATENCY) {
+		printf("Large abs latency: %lld ns :", latency/count);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+	long long length;
+	int clockid, ret;
+
+	for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+		/* Skip cputime clockids since nanosleep won't increment cputime */
+		if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+				clockid == CLOCK_THREAD_CPUTIME_ID ||
+				clockid == CLOCK_HWSPECIFIC)
+			continue;
+
+		printf("nsleep latency %-26s ", clockstring(clockid));
+
+		length = 10;
+		while (length <= (NSEC_PER_SEC * 10)) {
+			ret = nanosleep_lat_test(clockid, length);
+			if (ret)
+				break;
+			length *= 100;
+
+		}
+
+		if (ret == UNSUPPORTED) {
+			printf("[UNSUPPORTED]\n");
+			continue;
+		}
+		if (ret < 0) {
+			printf("[FAILED]\n");
+			return ksft_exit_fail();
+		}
+		printf("[OK]\n");
+	}
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index f87d970..5a246a0 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -35,10 +35,11 @@
 static void kernel_loop(void)
 {
 	void *addr = sbrk(0);
+	int err = 0;
 
-	while (!done) {
-		brk(addr + 4096);
-		brk(addr);
+	while (!done && !err) {
+		err = brk(addr + 4096);
+		err |= brk(addr);
 	}
 }
 
@@ -190,8 +191,6 @@
 
 int main(int argc, char **argv)
 {
-	int err;
-
 	printf("Testing posix timers. False negative may happen on CPU execution \n");
 	printf("based timers if other threads run on the CPU...\n");
 
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
new file mode 100644
index 0000000..30906bf
--- /dev/null
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -0,0 +1,154 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		    John Stultz <john.stultz@linaro.org>
+ *		(C) Copyright IBM 2012
+ *		(C) Copyright Linaro Limited 2015
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc raw_skew.c -o raw_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+
+#define CLOCK_MONOTONIC_RAW		4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({		\
+	__typeof__(x) __x = (x);	\
+	__typeof__(s) __s = (s);	\
+	__x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+	if (val < 0)
+		val = -val;
+	return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+	struct timespec ts;
+
+	ts.tv_sec = ns/NSEC_PER_SEC;
+	ts.tv_nsec = ns%NSEC_PER_SEC;
+	return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+	long long start_ns, end_ns;
+
+	start_ns = ts_to_nsec(start);
+	end_ns = ts_to_nsec(end);
+	return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+	struct timespec start, mid, end;
+	long long diff = 0, tmp;
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		long long newdiff;
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+
+		newdiff = diff_timespec(start, end);
+		if (diff == 0 || newdiff < diff) {
+			diff = newdiff;
+			*raw = mid;
+			tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+			*mon = nsec_to_ts(tmp);
+		}
+	}
+}
+
+int main(int argv, char **argc)
+{
+	struct timespec mon, raw, start, end;
+	long long delta1, delta2, interval, eppm, ppm;
+	struct timex tx1, tx2;
+
+	setbuf(stdout, NULL);
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+		printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+		return -1;
+	}
+
+	tx1.modes = 0;
+	adjtimex(&tx1);
+	get_monotonic_and_raw(&mon, &raw);
+	start = mon;
+	delta1 = diff_timespec(mon, raw);
+
+	if (tx1.offset)
+		printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+	printf("Estimating clock drift: ");
+	sleep(120);
+
+	get_monotonic_and_raw(&mon, &raw);
+	end = mon;
+	tx2.modes = 0;
+	adjtimex(&tx2);
+	delta2 = diff_timespec(mon, raw);
+
+	interval = diff_timespec(start, end);
+
+	/* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+	eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+	eppm = -eppm;
+	printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+	/* Avg the two actual freq samples adjtimex gave us */
+	ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+	ppm = (long long)tx1.freq * 1000;
+	ppm = shift_right(ppm, 16);
+	printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+	if (llabs(eppm - ppm) > 1000) {
+		printf("	[FAILED]\n");
+		return ksft_exit_fail();
+	}
+	printf("	[OK]\n");
+	return  ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
new file mode 100644
index 0000000..d80ae85
--- /dev/null
+++ b/tools/testing/selftests/timers/rtctest.c
@@ -0,0 +1,271 @@
+/*
+ *      Real Time Clock Driver Test/Example Program
+ *
+ *      Compile with:
+ *		     gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
+ *
+ *      Copyright (C) 1996, Paul Gortmaker.
+ *
+ *      Released under the GNU General Public License, version 2,
+ *      included herein by reference.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+
+int main(int argc, char **argv)
+{
+	int i, fd, retval, irqcount = 0;
+	unsigned long tmp, data;
+	struct rtc_time rtc_tm;
+	const char *rtc = default_rtc;
+	struct timeval start, end, diff;
+
+	switch (argc) {
+	case 2:
+		rtc = argv[1];
+		/* FALLTHROUGH */
+	case 1:
+		break;
+	default:
+		fprintf(stderr, "usage:  rtctest [rtcdev]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+
+	if (fd ==  -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
+
+	/* Turn on update interrupts (one per second) */
+	retval = ioctl(fd, RTC_UIE_ON, 0);
+	if (retval == -1) {
+		if (errno == ENOTTY) {
+			fprintf(stderr,
+				"\n...Update IRQs not supported.\n");
+			goto test_READ;
+		}
+		perror("RTC_UIE_ON ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
+			rtc);
+	fflush(stderr);
+	for (i=1; i<6; i++) {
+		/* This read will block */
+		retval = read(fd, &data, sizeof(unsigned long));
+		if (retval == -1) {
+			perror("read");
+			exit(errno);
+		}
+		fprintf(stderr, " %d",i);
+		fflush(stderr);
+		irqcount++;
+	}
+
+	fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
+	fflush(stderr);
+	for (i=1; i<6; i++) {
+		struct timeval tv = {5, 0};     /* 5 second timeout on select */
+		fd_set readfds;
+
+		FD_ZERO(&readfds);
+		FD_SET(fd, &readfds);
+		/* The select will wait until an RTC interrupt happens. */
+		retval = select(fd+1, &readfds, NULL, NULL, &tv);
+		if (retval == -1) {
+		        perror("select");
+		        exit(errno);
+		}
+		/* This read won't block unlike the select-less case above. */
+		retval = read(fd, &data, sizeof(unsigned long));
+		if (retval == -1) {
+		        perror("read");
+		        exit(errno);
+		}
+		fprintf(stderr, " %d",i);
+		fflush(stderr);
+		irqcount++;
+	}
+
+	/* Turn off update interrupts */
+	retval = ioctl(fd, RTC_UIE_OFF, 0);
+	if (retval == -1) {
+		perror("RTC_UIE_OFF ioctl");
+		exit(errno);
+	}
+
+test_READ:
+	/* Read the RTC time/date */
+	retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
+	if (retval == -1) {
+		perror("RTC_RD_TIME ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+		rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+	/* Set the alarm to 5 sec in the future, and check for rollover */
+	rtc_tm.tm_sec += 5;
+	if (rtc_tm.tm_sec >= 60) {
+		rtc_tm.tm_sec %= 60;
+		rtc_tm.tm_min++;
+	}
+	if (rtc_tm.tm_min == 60) {
+		rtc_tm.tm_min = 0;
+		rtc_tm.tm_hour++;
+	}
+	if (rtc_tm.tm_hour == 24)
+		rtc_tm.tm_hour = 0;
+
+	retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
+	if (retval == -1) {
+		if (errno == ENOTTY) {
+			fprintf(stderr,
+				"\n...Alarm IRQs not supported.\n");
+			goto test_PIE;
+		}
+		perror("RTC_ALM_SET ioctl");
+		exit(errno);
+	}
+
+	/* Read the current alarm settings */
+	retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
+	if (retval == -1) {
+		perror("RTC_ALM_READ ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
+		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+	/* Enable alarm interrupts */
+	retval = ioctl(fd, RTC_AIE_ON, 0);
+	if (retval == -1) {
+		perror("RTC_AIE_ON ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "Waiting 5 seconds for alarm...");
+	fflush(stderr);
+	/* This blocks until the alarm ring causes an interrupt */
+	retval = read(fd, &data, sizeof(unsigned long));
+	if (retval == -1) {
+		perror("read");
+		exit(errno);
+	}
+	irqcount++;
+	fprintf(stderr, " okay. Alarm rang.\n");
+
+	/* Disable alarm interrupts */
+	retval = ioctl(fd, RTC_AIE_OFF, 0);
+	if (retval == -1) {
+		perror("RTC_AIE_OFF ioctl");
+		exit(errno);
+	}
+
+test_PIE:
+	/* Read periodic IRQ rate */
+	retval = ioctl(fd, RTC_IRQP_READ, &tmp);
+	if (retval == -1) {
+		/* not all RTCs support periodic IRQs */
+		if (errno == ENOTTY) {
+			fprintf(stderr, "\nNo periodic IRQ support\n");
+			goto done;
+		}
+		perror("RTC_IRQP_READ ioctl");
+		exit(errno);
+	}
+	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
+
+	fprintf(stderr, "Counting 20 interrupts at:");
+	fflush(stderr);
+
+	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+	for (tmp=2; tmp<=64; tmp*=2) {
+
+		retval = ioctl(fd, RTC_IRQP_SET, tmp);
+		if (retval == -1) {
+			/* not all RTCs can change their periodic IRQ rate */
+			if (errno == ENOTTY) {
+				fprintf(stderr,
+					"\n...Periodic IRQ rate is fixed\n");
+				goto done;
+			}
+			perror("RTC_IRQP_SET ioctl");
+			exit(errno);
+		}
+
+		fprintf(stderr, "\n%ldHz:\t", tmp);
+		fflush(stderr);
+
+		/* Enable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_ON, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_ON ioctl");
+			exit(errno);
+		}
+
+		for (i=1; i<21; i++) {
+			gettimeofday(&start, NULL);
+			/* This blocks */
+			retval = read(fd, &data, sizeof(unsigned long));
+			if (retval == -1) {
+				perror("read");
+				exit(errno);
+			}
+			gettimeofday(&end, NULL);
+			timersub(&end, &start, &diff);
+			if (diff.tv_sec > 0 ||
+			    diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+				fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+				       diff.tv_sec, diff.tv_usec,
+				       (1000000L / tmp));
+				fflush(stdout);
+				exit(-1);
+			}
+
+			fprintf(stderr, " %d",i);
+			fflush(stderr);
+			irqcount++;
+		}
+
+		/* Disable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_OFF, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_OFF ioctl");
+			exit(errno);
+		}
+	}
+
+done:
+	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
new file mode 100644
index 0000000..c8a7e14
--- /dev/null
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -0,0 +1,144 @@
+/* Time bounds setting test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which sets the time to edge cases then
+ *  uses other tests to detect problems. Thus this test requires that
+ *  the inconsistency-check and nanosleep tests be present in the same
+ *  directory it is run from.
+ *
+ *  To build:
+ *	$ gcc set-2038.c -o set-2038 -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX	((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX	(KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL			/*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX			/*overflows 64bit ktime_t */
+#define YEAR_MAX  ((long long)((1ULL<<63)-1))	/*overflows 64bit time_t */
+
+int is32bits(void)
+{
+	return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+	struct timeval now;
+	int ret;
+
+	now.tv_sec = (time_t)time;
+	now.tv_usec  = 0;
+
+	ret = settimeofday(&now, NULL);
+
+	printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+	return ret;
+}
+
+int do_tests(void)
+{
+	int ret;
+
+	ret = system("date");
+	ret = system("./inconsistency-check -c 0 -t 20");
+	ret |= system("./nanosleep");
+	ret |= system("./nsleep-lat");
+	return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+	int ret = 0;
+	int opt, dangerous = 0;
+	time_t start;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "d")) != -1) {
+		switch (opt) {
+		case 'd':
+			dangerous = 1;
+		}
+	}
+
+	start = time(0);
+
+	/* First test that crazy values don't work */
+	if (!settime(YEAR_1901)) {
+		ret = -1;
+		goto out;
+	}
+	if (!settime(YEAR_MAX)) {
+		ret = -1;
+		goto out;
+	}
+	if (!is32bits() && !settime(YEAR_2262)) {
+		ret = -1;
+		goto out;
+	}
+
+	/* Now test behavior near edges */
+	settime(YEAR_1970);
+	ret = do_tests();
+	if (ret)
+		goto out;
+
+	settime(YEAR_2038 - 600);
+	ret = do_tests();
+	if (ret)
+		goto out;
+
+	/* The rest of the tests can blowup on 32bit systems */
+	if (is32bits() && !dangerous)
+		goto out;
+	/* Test rollover behavior 32bit edge */
+	settime(YEAR_2038 - 10);
+	ret = do_tests();
+	if (ret)
+		goto out;
+
+	settime(YEAR_2262 - 600);
+	ret = do_tests();
+
+out:
+	/* restore clock */
+	settime(start);
+	if (ret)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
new file mode 100644
index 0000000..dc88dbc
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -0,0 +1,79 @@
+/* Set tai offset
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+int set_tai(int offset)
+{
+	struct timex tx;
+
+	memset(&tx, 0, sizeof(tx));
+
+	tx.modes = ADJ_TAI;
+	tx.constant = offset;
+
+	return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+	struct timex tx;
+
+	memset(&tx, 0, sizeof(tx));
+
+	adjtimex(&tx);
+	return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+	int i, ret;
+
+	ret = get_tai();
+	printf("tai offset started at %i\n", ret);
+
+	printf("Checking tai offsets can be properly set: ");
+	for (i = 1; i <= 60; i++) {
+		ret = set_tai(i);
+		ret = get_tai();
+		if (ret != i) {
+			printf("[FAILED] expected: %i got %i\n", i, ret);
+			return ksft_exit_fail();
+		}
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
new file mode 100644
index 0000000..4fc98c5
--- /dev/null
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -0,0 +1,216 @@
+/* set_timer latency test
+ *		John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2014
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the set_timer api is correct
+ *
+ *  To build:
+ *	$ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+
+void sigalarm(int signo)
+{
+	long long delta_ns;
+	struct timespec ts;
+
+	clock_gettime(clock_id, &ts);
+	alarmcount++;
+
+	delta_ns = timespec_sub(start_time, ts);
+	delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+	if (delta_ns < 0)
+		printf("%s timer fired early: FAIL\n", clockstring(clock_id));
+
+	if (delta_ns > max_latency_ns)
+		max_latency_ns = delta_ns;
+}
+
+int do_timer(int clock_id, int flags)
+{
+	struct sigevent se;
+	timer_t tm1;
+	struct itimerspec its1, its2;
+	int err;
+
+	/* Set up timer: */
+	memset(&se, 0, sizeof(se));
+	se.sigev_notify = SIGEV_SIGNAL;
+	se.sigev_signo = SIGRTMAX;
+	se.sigev_value.sival_int = 0;
+
+	max_latency_ns = 0;
+	alarmcount = 0;
+
+	err = timer_create(clock_id, &se, &tm1);
+	if (err) {
+		if ((clock_id == CLOCK_REALTIME_ALARM) ||
+		    (clock_id == CLOCK_BOOTTIME_ALARM)) {
+			printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
+					clockstring(clock_id),
+					flags ? "ABSTIME":"RELTIME");
+			return 0;
+		}
+		printf("%s - timer_create() failed\n", clockstring(clock_id));
+		return -1;
+	}
+
+	clock_gettime(clock_id, &start_time);
+	if (flags) {
+		its1.it_value = start_time;
+		its1.it_value.tv_sec += TIMER_SECS;
+	} else {
+		its1.it_value.tv_sec = TIMER_SECS;
+		its1.it_value.tv_nsec = 0;
+	}
+	its1.it_interval.tv_sec = TIMER_SECS;
+	its1.it_interval.tv_nsec = 0;
+
+	err = timer_settime(tm1, flags, &its1, &its2);
+	if (err) {
+		printf("%s - timer_settime() failed\n", clockstring(clock_id));
+		return -1;
+	}
+
+	while (alarmcount < 5)
+		sleep(1);
+
+	printf("%-22s %s max latency: %10lld ns : ",
+			clockstring(clock_id),
+			flags ? "ABSTIME":"RELTIME",
+			max_latency_ns);
+
+	timer_delete(tm1);
+	if (max_latency_ns < UNRESONABLE_LATENCY) {
+		printf("[OK]\n");
+		return 0;
+	}
+	printf("[FAILED]\n");
+	return -1;
+}
+
+int main(void)
+{
+	struct sigaction act;
+	int signum = SIGRTMAX;
+	int ret = 0;
+
+	/* Set up signal handler: */
+	sigfillset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = sigalarm;
+	sigaction(signum, &act, NULL);
+
+	printf("Setting timers for every %i seconds\n", TIMER_SECS);
+	for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+		if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+				(clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+				(clock_id == CLOCK_MONOTONIC_RAW) ||
+				(clock_id == CLOCK_REALTIME_COARSE) ||
+				(clock_id == CLOCK_MONOTONIC_COARSE) ||
+				(clock_id == CLOCK_HWSPECIFIC))
+			continue;
+
+		ret |= do_timer(clock_id, TIMER_ABSTIME);
+		ret |= do_timer(clock_id, 0);
+	}
+	if (ret)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
new file mode 100644
index 0000000..5562f84
--- /dev/null
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -0,0 +1,89 @@
+/* ADJ_FREQ Skew consistency test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ *  and forth and watches for consistency problems. Thus this test requires
+ *  that the inconsistency-check tests be present in the same directory it
+ *  is run from.
+ *
+ *  To build:
+ *	$ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+	struct timex tx;
+	int ret, ppm;
+	pid_t pid;
+
+
+	printf("Running Asyncrhonous Frequency Changing Tests...\n");
+
+	pid = fork();
+	if (!pid)
+		return system("./inconsistency-check -c 1 -t 600");
+
+	ppm = 500;
+	ret = 0;
+
+	while (pid != waitpid(pid, &ret, WNOHANG)) {
+		ppm = -ppm;
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = ppm << 16;
+		adjtimex(&tx);
+		usleep(500000);
+	}
+
+	/* Set things back */
+	tx.modes = ADJ_FREQUENCY;
+	tx.offset = 0;
+	adjtimex(&tx);
+
+
+	if (ret) {
+		printf("[FAILED]\n");
+		return ksft_exit_fail();
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
new file mode 100644
index 0000000..e632e11
--- /dev/null
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -0,0 +1,204 @@
+/* threadtest.c
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2004, 2005, 2006, 2012
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc threadtest.c -o threadtest -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+	int i, j;
+	struct timespec *a, *b;
+
+	/* scan the list */
+	for (i = 0; i < size-1; i++) {
+		a = &list[i];
+		b = &list[i+1];
+
+		/* look for any time inconsistencies */
+		if ((b->tv_sec <= a->tv_sec) &&
+			(b->tv_nsec < a->tv_nsec)) {
+
+			/* flag other threads */
+			done = 1;
+
+			/*serialize printing to avoid junky output*/
+			pthread_mutex_lock(&print_lock);
+
+			/* dump the list */
+			printf("\n");
+			for (j = 0; j < size; j++) {
+				if (j == i)
+					printf("---------------\n");
+				printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+				if (j == i+1)
+					printf("---------------\n");
+			}
+			printf("[FAILED]\n");
+
+			pthread_mutex_unlock(&print_lock);
+		}
+	}
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+	while (!done) {
+		/* protect the list */
+		pthread_mutex_lock(&list_lock);
+
+		/* see if we're ready to check the list */
+		if (listcount >= LISTSIZE) {
+			checklist(global_list, LISTSIZE);
+			listcount = 0;
+		}
+		clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+		pthread_mutex_unlock(&list_lock);
+	}
+	return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+	struct timespec my_list[LISTSIZE];
+	int count;
+
+	while (!done) {
+		/* fill the list */
+		for (count = 0; count < LISTSIZE; count++)
+			clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+		checklist(my_list, LISTSIZE);
+	}
+	return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+	int thread_count, i;
+	time_t start, now, runtime;
+	char buf[255];
+	pthread_t pth[MAX_THREADS];
+	int opt;
+	void *tret;
+	int ret = 0;
+	void *(*thread)(void *) = shared_thread;
+
+	thread_count = DEFAULT_THREAD_COUNT;
+	runtime = DEFAULT_RUNTIME;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+		switch (opt) {
+		case 't':
+			runtime = atoi(optarg);
+			break;
+		case 'n':
+			thread_count = atoi(optarg);
+			break;
+		case 'i':
+			thread = independent_thread;
+			printf("using independent threads\n");
+			break;
+		default:
+			printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+			printf("	-t: time to run\n");
+			printf("	-n: number of threads\n");
+			printf("	-i: use independent threads\n");
+			return -1;
+		}
+	}
+
+	if (thread_count > MAX_THREADS)
+		thread_count = MAX_THREADS;
+
+
+	setbuf(stdout, NULL);
+
+	start = time(0);
+	strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+	printf("%s\n", buf);
+	printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+
+	/* spawn */
+	for (i = 0; i < thread_count; i++)
+		pthread_create(&pth[i], 0, thread, 0);
+
+	while (time(&now) < start + runtime) {
+		sleep(1);
+		if (done) {
+			ret = 1;
+			strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+			printf("%s\n", buf);
+			goto out;
+		}
+	}
+	printf("[OK]\n");
+	done = 1;
+
+out:
+	/* wait */
+	for (i = 0; i < thread_count; i++)
+		pthread_join(pth[i], &tret);
+
+	/* die */
+	if (ret)
+		ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
new file mode 100644
index 0000000..e86d937
--- /dev/null
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -0,0 +1,202 @@
+/* valid adjtimex test
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2015
+ *              Licensed under the GPLv2
+ *
+ *  This test validates adjtimex interface with valid
+ *  and invalid test data.
+ *
+ *  Usage: valid-adjtimex
+ *
+ *  To build:
+ *	$ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+	exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+	exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000L
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+	struct timex tx;
+	int ret;
+
+	tx.modes = ADJ_STATUS;
+	tx.status = 0;
+	ret = adjtimex(&tx);
+	return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+	-499<<16,
+	-450<<16,
+	-400<<16,
+	-350<<16,
+	-300<<16,
+	-250<<16,
+	-200<<16,
+	-150<<16,
+	-100<<16,
+	-75<<16,
+	-50<<16,
+	-25<<16,
+	-10<<16,
+	-5<<16,
+	-1<<16,
+	-1000,
+	1<<16,
+	5<<16,
+	10<<16,
+	25<<16,
+	50<<16,
+	75<<16,
+	100<<16,
+	150<<16,
+	200<<16,
+	250<<16,
+	300<<16,
+	350<<16,
+	400<<16,
+	450<<16,
+	499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+	-1000<<16,
+	-550<<16,
+	550<<16,
+	1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+	LONG_MAX,
+	LONG_MIN,
+};
+
+int validate_freq(void)
+{
+	struct timex tx;
+	int ret, pass = 0;
+	int i;
+
+	clear_time_state();
+
+	memset(&tx, 0, sizeof(struct timex));
+	/* Set the leap second insert flag */
+
+	printf("Testing ADJ_FREQ... ");
+	for (i = 0; i < NUM_FREQ_VALID; i++) {
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = valid_freq[i];
+
+		ret = adjtimex(&tx);
+		if (ret < 0) {
+			printf("[FAIL]\n");
+			printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+				valid_freq[i], valid_freq[i]>>16);
+			pass = -1;
+			goto out;
+		}
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.freq != valid_freq[i]) {
+			printf("Warning: freq value %ld not what we set it (%ld)!\n",
+					tx.freq, valid_freq[i]);
+		}
+	}
+	for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = outofrange_freq[i];
+
+		ret = adjtimex(&tx);
+		if (ret < 0) {
+			printf("[FAIL]\n");
+			printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+				outofrange_freq[i], outofrange_freq[i]>>16);
+			pass = -1;
+			goto out;
+		}
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.freq == outofrange_freq[i]) {
+			printf("[FAIL]\n");
+			printf("ERROR: out of range value %ld actually set!\n",
+					tx.freq);
+			pass = -1;
+			goto out;
+		}
+	}
+
+
+	if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+		for (i = 0; i < NUM_FREQ_INVALID; i++) {
+			tx.modes = ADJ_FREQUENCY;
+			tx.freq = invalid_freq[i];
+			ret = adjtimex(&tx);
+			if (ret >= 0) {
+				printf("[FAIL]\n");
+				printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+					invalid_freq[i]);
+				pass = -1;
+				goto out;
+			}
+		}
+	}
+
+	printf("[OK]\n");
+out:
+	/* reset freq to zero */
+	tx.modes = ADJ_FREQUENCY;
+	tx.freq = 0;
+	ret = adjtimex(&tx);
+
+	return pass;
+}
+
+
+int main(int argc, char **argv)
+{
+	if (validate_freq())
+		return ksft_exit_fail();
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile
index 12c9d15..d401b63 100644
--- a/tools/testing/selftests/user/Makefile
+++ b/tools/testing/selftests/user/Makefile
@@ -3,5 +3,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-run_tests: all
-	./test_user_copy.sh
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 077828c..a5ce953 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,5 @@
 # Makefile for vm selftests
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest
 BINARIES += transhuge-stress
@@ -9,8 +8,10 @@
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^ -lrt
 
-run_tests: all
-	@/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1)
+TEST_PROGS := run_vmtests
+TEST_FILES := $(BINARIES)
+
+include ../lib.mk
 
 clean:
 	$(RM) $(BINARIES)
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
new file mode 100644
index 0000000..15034fe
--- /dev/null
+++ b/tools/testing/selftests/x86/.gitignore
@@ -0,0 +1,2 @@
+*_32
+*_64
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
new file mode 100644
index 0000000..f0a7918
--- /dev/null
+++ b/tools/testing/selftests/x86/Makefile
@@ -0,0 +1,48 @@
+.PHONY: all all_32 all_64 check_build32 clean run_tests
+
+TARGETS_C_BOTHBITS := sigreturn
+
+BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
+BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+UNAME_P := $(shell uname -p)
+
+# Always build 32-bit tests
+all: all_32
+
+# If we're on a 64-bit host, build 64-bit tests as well
+ifeq ($(shell uname -p),x86_64)
+all: all_64
+endif
+
+all_32: check_build32 $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+clean:
+	$(RM) $(BINARIES_32) $(BINARIES_64)
+
+run_tests:
+	./run_x86_tests.sh
+
+$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c
+	$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
+	$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+check_build32:
+	@if ! $(CC) -m32 -o /dev/null trivial_32bit_program.c; then	\
+	  echo "Warning: you seem to have a broken 32-bit build" 2>&1; 	\
+	  echo "environment.  If you are using a Debian-like";		\
+	  echo " distribution, try:"; 					\
+	  echo "";							\
+	  echo "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+	  echo "";							\
+	  echo "If you are using a Fedora-like distribution, try:";	\
+	  echo "";							\
+	  echo "  yum install glibc-devel.*i686";			\
+	  exit 1;							\
+	fi
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
new file mode 100644
index 0000000..3d3ec65
--- /dev/null
+++ b/tools/testing/selftests/x86/run_x86_tests.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# This is deliberately minimal.  IMO kselftests should provide a standard
+# script here.
+./sigreturn_32 || exit 1
+
+if [[ "$uname -p" -eq "x86_64" ]]; then
+    ./sigreturn_64 || exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
new file mode 100644
index 0000000..b5aa1ba
--- /dev/null
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -0,0 +1,684 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation.  (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline.  Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+     ".type int3, @function\n\t"
+     ".align 4096\n\t"
+     "int3:\n\t"
+     "mov %ss,%eax\n\t"
+     "int3\n\t"
+     ".size int3, . - int3\n\t"
+     ".align 4096, 0xcc\n\t"
+     ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ *   descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ *   stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table.  These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+	return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+	return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+		    const char *name)
+{
+	if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+		*var = LDT3(desc->entry_number);
+	} else {
+		printf("[NOTE]\tFailed to create %s segment\n", name);
+		*var = 0;
+	}
+}
+
+static void setup_ldt(void)
+{
+	if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+		errx(1, "stack16 is too high\n");
+	if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+		errx(1, "int3 is too high\n");
+
+	ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+	const struct user_desc code16_desc = {
+		.entry_number    = LDT_OFFSET + 0,
+		.base_addr       = (unsigned long)int3,
+		.limit           = 4095,
+		.seg_32bit       = 0,
+		.contents        = 2, /* Code, not conforming */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	add_ldt(&code16_desc, &code16_sel, "code16");
+
+	const struct user_desc data16_desc = {
+		.entry_number    = LDT_OFFSET + 1,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 0,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	add_ldt(&data16_desc, &data16_sel, "data16");
+
+	const struct user_desc npcode32_desc = {
+		.entry_number    = LDT_OFFSET + 3,
+		.base_addr       = (unsigned long)int3,
+		.limit           = 4095,
+		.seg_32bit       = 1,
+		.contents        = 2, /* Code, not conforming */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 1,
+		.useable         = 0
+	};
+	add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+	const struct user_desc npdata32_desc = {
+		.entry_number    = LDT_OFFSET + 4,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 1,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 1,
+		.useable         = 0
+	};
+	add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+	struct user_desc gdt_data16_desc = {
+		.entry_number    = -1,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 0,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+
+	if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+		/*
+		 * This probably indicates vulnerability to CVE-2014-8133.
+		 * Merely getting here isn't definitive, though, and we'll
+		 * diagnose the problem for real later on.
+		 */
+		printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+		       gdt_data16_desc.entry_number);
+		gdt_data16_idx = gdt_data16_desc.entry_number;
+	} else {
+		printf("[OK]\tset_thread_area refused 16-bit data\n");
+	}
+
+	struct user_desc gdt_npdata32_desc = {
+		.entry_number    = -1,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 1,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 1,
+		.useable         = 0
+	};
+
+	if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+		/*
+		 * As a hardening measure, newer kernels don't allow this.
+		 */
+		printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+		       gdt_npdata32_desc.entry_number);
+		gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+	} else {
+		printf("[OK]\tset_thread_area refused 16-bit data\n");
+	}
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_AX REG_RAX
+
+struct selectors {
+	unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+	return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+	return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_AX REG_EAX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+	return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+	return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+/*
+ * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
+ * int3 trampoline.  Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+	*csptr(ctx) = sig_cs;
+	*ssptr(ctx) = sig_ss;
+
+	ctx->uc_mcontext.gregs[REG_IP] =
+		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+	ctx->uc_mcontext.gregs[REG_AX] = 0;
+
+	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
+
+	return;
+}
+
+/*
+ * Called after a successful sigreturn.  Restores our state so that
+ * the original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+	unsigned short ss;
+	asm ("mov %%ss,%0" : "=r" (ss));
+
+	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+	if (asm_ss != sig_ss && sig == SIGTRAP) {
+		/* Sanity check failure. */
+		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
+		nerrs++;
+	}
+
+	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+	sig_trapped = sig;
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+	uint32_t valid = 0, ar;
+	asm ("lar %[cs], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "mov $1, %[valid]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [valid] "+rm" (valid)
+	     : [cs] "r" (cs));
+
+	if (!valid)
+		return -1;
+
+	bool db = (ar & (1 << 22));
+	bool l = (ar & (1 << 21));
+
+	if (!(ar & (1<<11)))
+	    return -1;	/* Not code. */
+
+	if (l && !db)
+		return 64;
+	else if (!l && db)
+		return 32;
+	else if (!l && !db)
+		return 16;
+	else
+		return -1;	/* Unknown bitness. */
+}
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+	unsigned short my_cs;
+
+	asm ("mov %%cs,%0" :  "=r" (my_cs));
+
+	if (cs_bitness(my_cs) == bitness)
+		return my_cs;
+	if (cs_bitness(my_cs + (2 << 3)) == bitness)
+		return my_cs + (2 << 3);
+	if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+	    return my_cs - (2 << 3);
+	if (cs_bitness(code16_sel) == bitness)
+		return code16_sel;
+
+	printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+	return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+	int cs = find_cs(cs_bits);
+	if (cs == -1) {
+		printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+		       cs_bits, use_16bit_ss ? 16 : 32);
+		return 0;
+	}
+
+	if (force_ss != -1) {
+		sig_ss = force_ss;
+	} else {
+		if (use_16bit_ss) {
+			if (!data16_sel) {
+				printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+				       cs_bits);
+				return 0;
+			}
+			sig_ss = data16_sel;
+		} else {
+			asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+		}
+	}
+
+	sig_cs = cs;
+
+	printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+	       cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+	       (sig_ss & 4) ? "" : ", GDT");
+
+	raise(SIGUSR1);
+
+	nerrs = 0;
+
+	/*
+	 * Check that each register had an acceptable value when the
+	 * int3 trampoline was invoked.
+	 */
+	for (int i = 0; i < NGREG; i++) {
+		greg_t req = requested_regs[i], res = resulting_regs[i];
+		if (i == REG_TRAPNO || i == REG_IP)
+			continue;	/* don't care */
+		if (i == REG_SP) {
+			printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
+			       (unsigned long long)res);
+
+			/*
+			 * In many circumstances, the high 32 bits of rsp
+			 * are zeroed.  For example, we could be a real
+			 * 32-bit program, or we could hit any of a number
+			 * of poorly-documented IRET or segmented ESP
+			 * oddities.  If this happens, it's okay.
+			 */
+			if (res == (req & 0xFFFFFFFF))
+				continue;  /* OK; not expected to work */
+		}
+
+		bool ignore_reg = false;
+#if __i386__
+		if (i == REG_UESP)
+			ignore_reg = true;
+#else
+		if (i == REG_CSGSFS) {
+			struct selectors *req_sels =
+				(void *)&requested_regs[REG_CSGSFS];
+			struct selectors *res_sels =
+				(void *)&resulting_regs[REG_CSGSFS];
+			if (req_sels->cs != res_sels->cs) {
+				printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+				       req_sels->cs, res_sels->cs);
+				nerrs++;
+			}
+
+			if (req_sels->ss != res_sels->ss) {
+				printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+				       req_sels->ss, res_sels->ss);
+				nerrs++;
+			}
+
+			continue;
+		}
+#endif
+
+		/* Sanity check on the kernel */
+		if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
+			printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+			continue;
+		}
+
+		if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
+			/*
+			 * SP is particularly interesting here.  The
+			 * usual cause of failures is that we hit the
+			 * nasty IRET case of returning to a 16-bit SS,
+			 * in which case bits 16:31 of the *kernel*
+			 * stack pointer persist in ESP.
+			 */
+			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+			       i, (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+		}
+	}
+
+	if (nerrs == 0)
+		printf("[OK]\tall registers okay\n");
+
+	return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+	int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+	if (cs == -1)
+		return 0;
+
+	sig_cs = cs;
+	sig_ss = ss;
+
+	printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+	       cs_bits, sig_cs, sig_ss);
+
+	sig_trapped = 0;
+	raise(SIGUSR1);
+	if (sig_trapped) {
+		char errdesc[32] = "";
+		if (sig_err) {
+			const char *src = (sig_err & 1) ? " EXT" : "";
+			const char *table;
+			if ((sig_err & 0x6) == 0x0)
+				table = "GDT";
+			else if ((sig_err & 0x6) == 0x4)
+				table = "LDT";
+			else if ((sig_err & 0x6) == 0x2)
+				table = "IDT";
+			else
+				table = "???";
+
+			sprintf(errdesc, "%s%s index %d, ",
+				table, src, sig_err >> 3);
+		}
+
+		char trapname[32];
+		if (sig_trapno == 13)
+			strcpy(trapname, "GP");
+		else if (sig_trapno == 11)
+			strcpy(trapname, "NP");
+		else if (sig_trapno == 12)
+			strcpy(trapname, "SS");
+		else if (sig_trapno == 32)
+			strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
+		else
+			sprintf(trapname, "%d", sig_trapno);
+
+		printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+		       trapname, (unsigned long)sig_err,
+		       errdesc, strsignal(sig_trapped));
+		return 0;
+	} else {
+		printf("[FAIL]\tDid not get SIGSEGV\n");
+		return 1;
+	}
+}
+
+int main()
+{
+	int total_nerrs = 0;
+	unsigned short my_cs, my_ss;
+
+	asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+	asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+	setup_ldt();
+
+	stack_t stack = {
+		.ss_sp = altstack_data,
+		.ss_size = SIGSTKSZ,
+	};
+	if (sigaltstack(&stack, NULL) != 0)
+		err(1, "sigaltstack");
+
+	sethandler(SIGUSR1, sigusr1, 0);
+	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+	/* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+	total_nerrs += test_valid_sigreturn(64, false, -1);
+	total_nerrs += test_valid_sigreturn(32, false, -1);
+	total_nerrs += test_valid_sigreturn(16, false, -1);
+
+	/*
+	 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+	 * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
+	 *
+	 * This catches the original missing-espfix-on-64-bit-kernels issue
+	 * as well as CVE-2014-8134.
+	 */
+	total_nerrs += test_valid_sigreturn(64, true, -1);
+	total_nerrs += test_valid_sigreturn(32, true, -1);
+	total_nerrs += test_valid_sigreturn(16, true, -1);
+
+	if (gdt_data16_idx) {
+		/*
+		 * For performance reasons, Linux skips espfix if SS points
+		 * to the GDT.  If we were able to allocate a 16-bit SS in
+		 * the GDT, see if it leaks parts of the kernel stack pointer.
+		 *
+		 * This tests for CVE-2014-8133.
+		 */
+		total_nerrs += test_valid_sigreturn(64, true,
+						    GDT3(gdt_data16_idx));
+		total_nerrs += test_valid_sigreturn(32, true,
+						    GDT3(gdt_data16_idx));
+		total_nerrs += test_valid_sigreturn(16, true,
+						    GDT3(gdt_data16_idx));
+	}
+
+	/*
+	 * We're done testing valid sigreturn cases.  Now we test states
+	 * for which sigreturn itself will succeed but the subsequent
+	 * entry to user mode will fail.
+	 *
+	 * Depending on the failure mode and the kernel bitness, these
+	 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+	 */
+	clearhandler(SIGTRAP);
+	sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+	sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+	sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
+
+	/* Easy failures: invalid SS, resulting in #GP(0) */
+	test_bad_iret(64, ldt_nonexistent_sel, -1);
+	test_bad_iret(32, ldt_nonexistent_sel, -1);
+	test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+	/* These fail because SS isn't a data segment, resulting in #GP(SS) */
+	test_bad_iret(64, my_cs, -1);
+	test_bad_iret(32, my_cs, -1);
+	test_bad_iret(16, my_cs, -1);
+
+	/* Try to return to a not-present code segment, triggering #NP(SS). */
+	test_bad_iret(32, my_ss, npcode32_sel);
+
+	/*
+	 * Try to return to a not-present but otherwise valid data segment.
+	 * This will cause IRET to fail with #SS on the espfix stack.  This
+	 * exercises CVE-2014-9322.
+	 *
+	 * Note that, if espfix is enabled, 64-bit Linux will lose track
+	 * of the actual cause of failure and report #GP(0) instead.
+	 * This would be very difficult for Linux to avoid, because
+	 * espfix64 causes IRET failures to be promoted to #DF, so the
+	 * original exception frame is never pushed onto the stack.
+	 */
+	test_bad_iret(32, npdata32_sel, -1);
+
+	/*
+	 * Try to return to a not-present but otherwise valid data
+	 * segment without invoking espfix.  Newer kernels don't allow
+	 * this to happen in the first place.  On older kernels, though,
+	 * this can trigger CVE-2014-9322.
+	 */
+	if (gdt_npdata32_idx)
+		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+	return total_nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
new file mode 100644
index 0000000..2e231be
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_32bit_program.c
@@ -0,0 +1,14 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#include <stdio.h>
+
+int main()
+{
+	printf("\n");
+
+	return 0;
+}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 6e54f35..98c95f2 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -85,13 +85,22 @@
 	return IRQ_HANDLED;
 }
 
+/*
+ * Work function for handling the backup timer that we schedule when a vcpu is
+ * no longer running, but had a timer programmed to fire in the future.
+ */
 static void kvm_timer_inject_irq_work(struct work_struct *work)
 {
 	struct kvm_vcpu *vcpu;
 
 	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
 	vcpu->arch.timer_cpu.armed = false;
-	kvm_timer_inject_irq(vcpu);
+
+	/*
+	 * If the vcpu is blocked we want to wake it up so that it will see
+	 * the timer has expired when entering the guest.
+	 */
+	kvm_vcpu_kick(vcpu);
 }
 
 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
@@ -102,6 +111,21 @@
 	return HRTIMER_NORESTART;
 }
 
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	cycle_t cval, now;
+
+	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
+		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
+		return false;
+
+	cval = timer->cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+	return cval <= now;
+}
+
 /**
  * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
  * @vcpu: The vcpu pointer
@@ -119,6 +143,13 @@
 	 * populate the CPU timer again.
 	 */
 	timer_disarm(timer);
+
+	/*
+	 * If the timer expired while we were not scheduled, now is the time
+	 * to inject it.
+	 */
+	if (kvm_timer_should_fire(vcpu))
+		kvm_timer_inject_irq(vcpu);
 }
 
 /**
@@ -134,16 +165,9 @@
 	cycle_t cval, now;
 	u64 ns;
 
-	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
-		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
-		return;
-
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
 	BUG_ON(timer_is_armed(timer));
 
-	if (cval <= now) {
+	if (kvm_timer_should_fire(vcpu)) {
 		/*
 		 * Timer has already expired while we were not
 		 * looking. Inject the interrupt and carry on.
@@ -152,6 +176,9 @@
 		return;
 	}
 
+	cval = timer->cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
 	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
 				 &timecounter->frac);
 	timer_arm(timer, ns);
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 19c6210..1390797 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -107,6 +107,22 @@
 					     vcpu->vcpu_id);
 }
 
+static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
+				       struct kvm_exit_mmio *mmio,
+				       phys_addr_t offset)
+{
+	return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
+					  vcpu->vcpu_id);
+}
+
+static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
+					 struct kvm_exit_mmio *mmio,
+					 phys_addr_t offset)
+{
+	return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
+					    vcpu->vcpu_id);
+}
+
 static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
 				     struct kvm_exit_mmio *mmio,
 				     phys_addr_t offset)
@@ -303,7 +319,7 @@
 		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 }
 
-static const struct kvm_mmio_range vgic_dist_ranges[] = {
+static const struct vgic_io_range vgic_dist_ranges[] = {
 	{
 		.base		= GIC_DIST_CTRL,
 		.len		= 12,
@@ -344,13 +360,13 @@
 		.base		= GIC_DIST_ACTIVE_SET,
 		.len		= VGIC_MAX_IRQS / 8,
 		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_raz_wi,
+		.handle_mmio	= handle_mmio_set_active_reg,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_CLEAR,
 		.len		= VGIC_MAX_IRQS / 8,
 		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_raz_wi,
+		.handle_mmio	= handle_mmio_clear_active_reg,
 	},
 	{
 		.base		= GIC_DIST_PRI,
@@ -388,24 +404,6 @@
 	{}
 };
 
-static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-				struct kvm_exit_mmio *mmio)
-{
-	unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
-
-	if (!is_in_range(mmio->phys_addr, mmio->len, base,
-			 KVM_VGIC_V2_DIST_SIZE))
-		return false;
-
-	/* GICv2 does not support accesses wider than 32 bits */
-	if (mmio->len > 4) {
-		kvm_inject_dabt(vcpu, mmio->phys_addr);
-		return true;
-	}
-
-	return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
-}
-
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -490,6 +488,7 @@
 static int vgic_v2_map_resources(struct kvm *kvm,
 				 const struct vgic_params *params)
 {
+	struct vgic_dist *dist = &kvm->arch.vgic;
 	int ret = 0;
 
 	if (!irqchip_in_kernel(kvm))
@@ -500,13 +499,17 @@
 	if (vgic_ready(kvm))
 		goto out;
 
-	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+	    IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
 		kvm_err("Need to set vgic cpu and dist addresses first\n");
 		ret = -ENXIO;
 		goto out;
 	}
 
+	vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+				 KVM_VGIC_V2_DIST_SIZE,
+				 vgic_dist_ranges, -1, &dist->dist_iodev);
+
 	/*
 	 * Initialize the vgic if this hasn't already been done on demand by
 	 * accessing the vgic state from userspace.
@@ -514,18 +517,23 @@
 	ret = vgic_init(kvm);
 	if (ret) {
 		kvm_err("Unable to allocate maps\n");
-		goto out;
+		goto out_unregister;
 	}
 
-	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+	ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
 				    params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
 				    true);
 	if (ret) {
 		kvm_err("Unable to remap VGIC CPU to VCPU\n");
-		goto out;
+		goto out_unregister;
 	}
 
-	kvm->arch.vgic.ready = true;
+	dist->ready = true;
+	goto out;
+
+out_unregister:
+	kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+
 out:
 	if (ret)
 		kvm_vgic_destroy(kvm);
@@ -554,7 +562,6 @@
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 
-	dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
 	dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
 	dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
 	dist->vm_ops.init_model = vgic_v2_init_model;
@@ -631,7 +638,7 @@
  * CPU Interface Register accesses - these are not accessed by the VM, but by
  * user space for saving and restoring VGIC state.
  */
-static const struct kvm_mmio_range vgic_cpu_ranges[] = {
+static const struct vgic_io_range vgic_cpu_ranges[] = {
 	{
 		.base		= GIC_CPU_CTRL,
 		.len		= 12,
@@ -658,12 +665,13 @@
 				 struct kvm_device_attr *attr,
 				 u32 *reg, bool is_write)
 {
-	const struct kvm_mmio_range *r = NULL, *ranges;
+	const struct vgic_io_range *r = NULL, *ranges;
 	phys_addr_t offset;
 	int ret, cpuid, c;
 	struct kvm_vcpu *vcpu, *tmp_vcpu;
 	struct vgic_dist *vgic;
 	struct kvm_exit_mmio mmio;
+	u32 data;
 
 	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
 	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
@@ -685,6 +693,7 @@
 
 	mmio.len = 4;
 	mmio.is_write = is_write;
+	mmio.data = &data;
 	if (is_write)
 		mmio_data_write(&mmio, ~0, *reg);
 	switch (attr->group) {
@@ -699,7 +708,7 @@
 	default:
 		BUG();
 	}
-	r = vgic_find_range(ranges, &mmio, offset);
+	r = vgic_find_range(ranges, 4, offset);
 
 	if (unlikely(!r || !r->handle_mmio)) {
 		ret = -ENXIO;
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index b3f1546..e9c3a7a 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -340,7 +340,7 @@
 	return false;
 }
 
-static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
+static const struct vgic_io_range vgic_v3_dist_ranges[] = {
 	{
 		.base           = GICD_CTLR,
 		.len            = 0x04,
@@ -502,6 +502,43 @@
 	{},
 };
 
+static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
+				    struct kvm_exit_mmio *mmio,
+				    phys_addr_t offset)
+{
+	/* since we don't support LPIs, this register is zero for now */
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
+				     struct kvm_exit_mmio *mmio,
+				     phys_addr_t offset)
+{
+	u32 reg;
+	u64 mpidr;
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+	int target_vcpu_id = redist_vcpu->vcpu_id;
+
+	/* the upper 32 bits contain the affinity value */
+	if ((offset & ~3) == 4) {
+		mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
+		reg = compress_mpidr(mpidr);
+
+		vgic_reg_access(mmio, &reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+		return false;
+	}
+
+	reg = redist_vcpu->vcpu_id << 8;
+	if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+		reg |= GICR_TYPER_LAST;
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
 static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
 					      struct kvm_exit_mmio *mmio,
 					      phys_addr_t offset)
@@ -570,113 +607,9 @@
 	return vgic_handle_cfg_reg(reg, mmio, offset);
 }
 
-static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
-	{
-		.base		= GICR_IGROUPR0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_rao_wi,
-	},
-	{
-		.base		= GICR_ISENABLER0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_set_enable_reg_redist,
-	},
-	{
-		.base		= GICR_ICENABLER0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_clear_enable_reg_redist,
-	},
-	{
-		.base		= GICR_ISPENDR0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_set_pending_reg_redist,
-	},
-	{
-		.base		= GICR_ICPENDR0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_clear_pending_reg_redist,
-	},
-	{
-		.base		= GICR_ISACTIVER0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GICR_ICACTIVER0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GICR_IPRIORITYR0,
-		.len		= 0x20,
-		.bits_per_irq	= 8,
-		.handle_mmio	= handle_mmio_priority_reg_redist,
-	},
-	{
-		.base		= GICR_ICFGR0,
-		.len		= 0x08,
-		.bits_per_irq	= 2,
-		.handle_mmio	= handle_mmio_cfg_reg_redist,
-	},
-	{
-		.base		= GICR_IGRPMODR0,
-		.len		= 0x04,
-		.bits_per_irq	= 1,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GICR_NSACR,
-		.len		= 0x04,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{},
-};
+#define SGI_base(x) ((x) + SZ_64K)
 
-static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
-				    struct kvm_exit_mmio *mmio,
-				    phys_addr_t offset)
-{
-	/* since we don't support LPIs, this register is zero for now */
-	vgic_reg_access(mmio, NULL, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
-	return false;
-}
-
-static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
-				     struct kvm_exit_mmio *mmio,
-				     phys_addr_t offset)
-{
-	u32 reg;
-	u64 mpidr;
-	struct kvm_vcpu *redist_vcpu = mmio->private;
-	int target_vcpu_id = redist_vcpu->vcpu_id;
-
-	/* the upper 32 bits contain the affinity value */
-	if ((offset & ~3) == 4) {
-		mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
-		reg = compress_mpidr(mpidr);
-
-		vgic_reg_access(mmio, &reg, offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		return false;
-	}
-
-	reg = redist_vcpu->vcpu_id << 8;
-	if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
-		reg |= GICR_TYPER_LAST;
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-	return false;
-}
-
-static const struct kvm_mmio_range vgic_redist_ranges[] = {
+static const struct vgic_io_range vgic_redist_ranges[] = {
 	{
 		.base           = GICR_CTLR,
 		.len            = 0x04,
@@ -707,49 +640,74 @@
 		.bits_per_irq   = 0,
 		.handle_mmio    = handle_mmio_idregs,
 	},
+	{
+		.base		= SGI_base(GICR_IGROUPR0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_rao_wi,
+	},
+	{
+		.base		= SGI_base(GICR_ISENABLER0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_enable_reg_redist,
+	},
+	{
+		.base		= SGI_base(GICR_ICENABLER0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_enable_reg_redist,
+	},
+	{
+		.base		= SGI_base(GICR_ISPENDR0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_pending_reg_redist,
+	},
+	{
+		.base		= SGI_base(GICR_ICPENDR0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_pending_reg_redist,
+	},
+	{
+		.base		= SGI_base(GICR_ISACTIVER0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= SGI_base(GICR_ICACTIVER0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= SGI_base(GICR_IPRIORITYR0),
+		.len		= 0x20,
+		.bits_per_irq	= 8,
+		.handle_mmio	= handle_mmio_priority_reg_redist,
+	},
+	{
+		.base		= SGI_base(GICR_ICFGR0),
+		.len		= 0x08,
+		.bits_per_irq	= 2,
+		.handle_mmio	= handle_mmio_cfg_reg_redist,
+	},
+	{
+		.base		= SGI_base(GICR_IGRPMODR0),
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= SGI_base(GICR_NSACR),
+		.len		= 0x04,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
 	{},
 };
 
-/*
- * This function splits accesses between the distributor and the two
- * redistributor parts (private/SPI). As each redistributor is accessible
- * from any CPU, we have to determine the affected VCPU by taking the faulting
- * address into account. We then pass this VCPU to the handler function via
- * the private parameter.
- */
-#define SGI_BASE_OFFSET SZ_64K
-static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-				struct kvm_exit_mmio *mmio)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long dbase = dist->vgic_dist_base;
-	unsigned long rdbase = dist->vgic_redist_base;
-	int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
-	int vcpu_id;
-	const struct kvm_mmio_range *mmio_range;
-
-	if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
-		return vgic_handle_mmio_range(vcpu, run, mmio,
-					      vgic_v3_dist_ranges, dbase);
-	}
-
-	if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
-	    GIC_V3_REDIST_SIZE * nrcpus))
-		return false;
-
-	vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
-	rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
-	mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
-
-	if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
-		rdbase += SGI_BASE_OFFSET;
-		mmio_range = vgic_redist_sgi_ranges;
-	} else {
-		mmio_range = vgic_redist_ranges;
-	}
-	return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
-}
-
 static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 {
 	if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -766,6 +724,9 @@
 {
 	int ret = 0;
 	struct vgic_dist *dist = &kvm->arch.vgic;
+	gpa_t rdbase = dist->vgic_redist_base;
+	struct vgic_io_device *iodevs = NULL;
+	int i;
 
 	if (!irqchip_in_kernel(kvm))
 		return 0;
@@ -791,7 +752,41 @@
 		goto out;
 	}
 
-	kvm->arch.vgic.ready = true;
+	ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+				       GIC_V3_DIST_SIZE, vgic_v3_dist_ranges,
+				       -1, &dist->dist_iodev);
+	if (ret)
+		goto out;
+
+	iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL);
+	if (!iodevs) {
+		ret = -ENOMEM;
+		goto out_unregister;
+	}
+
+	for (i = 0; i < dist->nr_cpus; i++) {
+		ret = vgic_register_kvm_io_dev(kvm, rdbase,
+					       SZ_128K, vgic_redist_ranges,
+					       i, &iodevs[i]);
+		if (ret)
+			goto out_unregister;
+		rdbase += GIC_V3_REDIST_SIZE;
+	}
+
+	dist->redist_iodevs = iodevs;
+	dist->ready = true;
+	goto out;
+
+out_unregister:
+	kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+	if (iodevs) {
+		for (i = 0; i < dist->nr_cpus; i++) {
+			if (iodevs[i].dev.ops)
+				kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+							  &iodevs[i].dev);
+		}
+	}
+
 out:
 	if (ret)
 		kvm_vgic_destroy(kvm);
@@ -832,7 +827,6 @@
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 
-	dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
 	dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
 	dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
 	dist->vm_ops.init_model = vgic_v3_init_model;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c9f60f5..8d550ff 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -31,6 +31,9 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <trace/events/kvm.h>
+#include <asm/kvm.h>
+#include <kvm/iodev.h>
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -263,6 +266,13 @@
 	return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 }
 
+static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+}
+
 static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -277,6 +287,20 @@
 	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
+static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+}
+
 static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -520,6 +544,44 @@
 	return false;
 }
 
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+				struct kvm_exit_mmio *mmio,
+				phys_addr_t offset, int vcpu_id)
+{
+	u32 *reg;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+	if (mmio->is_write) {
+		vgic_update_state(kvm);
+		return true;
+	}
+
+	return false;
+}
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset, int vcpu_id)
+{
+	u32 *reg;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+	if (mmio->is_write) {
+		vgic_update_state(kvm);
+		return true;
+	}
+
+	return false;
+}
+
 static u32 vgic_cfg_expand(u16 val)
 {
 	u32 res = 0;
@@ -588,16 +650,12 @@
 }
 
 /**
- * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
  *
- * Move any pending IRQs that have already been assigned to LRs back to the
+ * Move any IRQs that have already been assigned to LRs back to the
  * emulated distributor state so that the complete emulated state can be read
  * from the main emulation structures without investigating the LRs.
- *
- * Note that IRQs in the active state in the LRs get their pending state moved
- * to the distributor but the active state stays in the LRs, because we don't
- * track the active state on the distributor side.
  */
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 {
@@ -613,12 +671,22 @@
 		 * 01: pending
 		 * 10: active
 		 * 11: pending and active
-		 *
-		 * If the LR holds only an active interrupt (not pending) then
-		 * just leave it alone.
 		 */
-		if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
-			continue;
+		BUG_ON(!(lr.state & LR_STATE_MASK));
+
+		/* Reestablish SGI source for pending and active IRQs */
+		if (lr.irq < VGIC_NR_SGIS)
+			add_sgi_source(vcpu, lr.irq, lr.source);
+
+		/*
+		 * If the LR holds an active (10) or a pending and active (11)
+		 * interrupt then move the active state to the
+		 * distributor tracking bit.
+		 */
+		if (lr.state & LR_STATE_ACTIVE) {
+			vgic_irq_set_active(vcpu, lr.irq);
+			lr.state &= ~LR_STATE_ACTIVE;
+		}
 
 		/*
 		 * Reestablish the pending state on the distributor and the
@@ -626,21 +694,19 @@
 		 * is fine, then we are only setting a few bits that were
 		 * already set.
 		 */
-		vgic_dist_irq_set_pending(vcpu, lr.irq);
-		if (lr.irq < VGIC_NR_SGIS)
-			add_sgi_source(vcpu, lr.irq, lr.source);
-		lr.state &= ~LR_STATE_PENDING;
+		if (lr.state & LR_STATE_PENDING) {
+			vgic_dist_irq_set_pending(vcpu, lr.irq);
+			lr.state &= ~LR_STATE_PENDING;
+		}
+
 		vgic_set_lr(vcpu, i, lr);
 
 		/*
-		 * If there's no state left on the LR (it could still be
-		 * active), then the LR does not hold any useful info and can
-		 * be marked as free for other use.
+		 * Mark the LR as free for other use.
 		 */
-		if (!(lr.state & LR_STATE_MASK)) {
-			vgic_retire_lr(i, lr.irq, vcpu);
-			vgic_irq_clear_queued(vcpu, lr.irq);
-		}
+		BUG_ON(lr.state & LR_STATE_MASK);
+		vgic_retire_lr(i, lr.irq, vcpu);
+		vgic_irq_clear_queued(vcpu, lr.irq);
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);
@@ -648,24 +714,21 @@
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t offset)
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+				      int len, gpa_t offset)
 {
-	const struct kvm_mmio_range *r = ranges;
-
-	while (r->len) {
-		if (offset >= r->base &&
-		    (offset + mmio->len) <= (r->base + r->len))
-			return r;
-		r++;
+	while (ranges->len) {
+		if (offset >= ranges->base &&
+		    (offset + len) <= (ranges->base + ranges->len))
+			return ranges;
+		ranges++;
 	}
 
 	return NULL;
 }
 
 static bool vgic_validate_access(const struct vgic_dist *dist,
-				 const struct kvm_mmio_range *range,
+				 const struct vgic_io_range *range,
 				 unsigned long offset)
 {
 	int irq;
@@ -693,9 +756,8 @@
 static bool call_range_handler(struct kvm_vcpu *vcpu,
 			       struct kvm_exit_mmio *mmio,
 			       unsigned long offset,
-			       const struct kvm_mmio_range *range)
+			       const struct vgic_io_range *range)
 {
-	u32 *data32 = (void *)mmio->data;
 	struct kvm_exit_mmio mmio32;
 	bool ret;
 
@@ -712,91 +774,142 @@
 	mmio32.private = mmio->private;
 
 	mmio32.phys_addr = mmio->phys_addr + 4;
-	if (mmio->is_write)
-		*(u32 *)mmio32.data = data32[1];
+	mmio32.data = &((u32 *)mmio->data)[1];
 	ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
-	if (!mmio->is_write)
-		data32[1] = *(u32 *)mmio32.data;
 
 	mmio32.phys_addr = mmio->phys_addr;
-	if (mmio->is_write)
-		*(u32 *)mmio32.data = data32[0];
+	mmio32.data = &((u32 *)mmio->data)[0];
 	ret |= range->handle_mmio(vcpu, &mmio32, offset);
-	if (!mmio->is_write)
-		data32[0] = *(u32 *)mmio32.data;
 
 	return ret;
 }
 
 /**
- * vgic_handle_mmio_range - handle an in-kernel MMIO access
+ * vgic_handle_mmio_access - handle an in-kernel MMIO access
+ * This is called by the read/write KVM IO device wrappers below.
  * @vcpu:	pointer to the vcpu performing the access
- * @run:	pointer to the kvm_run structure
- * @mmio:	pointer to the data describing the access
- * @ranges:	array of MMIO ranges in a given region
- * @mmio_base:	base address of that region
+ * @this:	pointer to the KVM IO device in charge
+ * @addr:	guest physical address of the access
+ * @len:	size of the access
+ * @val:	pointer to the data region
+ * @is_write:	read or write access
  *
  * returns true if the MMIO access could be performed
  */
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
-			    struct kvm_exit_mmio *mmio,
-			    const struct kvm_mmio_range *ranges,
-			    unsigned long mmio_base)
+static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
+				   struct kvm_io_device *this, gpa_t addr,
+				   int len, void *val, bool is_write)
 {
-	const struct kvm_mmio_range *range;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_io_device *iodev = container_of(this,
+						    struct vgic_io_device, dev);
+	struct kvm_run *run = vcpu->run;
+	const struct vgic_io_range *range;
+	struct kvm_exit_mmio mmio;
 	bool updated_state;
-	unsigned long offset;
+	gpa_t offset;
 
-	offset = mmio->phys_addr - mmio_base;
-	range = vgic_find_range(ranges, mmio, offset);
+	offset = addr - iodev->addr;
+	range = vgic_find_range(iodev->reg_ranges, len, offset);
 	if (unlikely(!range || !range->handle_mmio)) {
-		pr_warn("Unhandled access %d %08llx %d\n",
-			mmio->is_write, mmio->phys_addr, mmio->len);
-		return false;
+		pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
+		return -ENXIO;
 	}
 
-	spin_lock(&vcpu->kvm->arch.vgic.lock);
+	mmio.phys_addr = addr;
+	mmio.len = len;
+	mmio.is_write = is_write;
+	mmio.data = val;
+	mmio.private = iodev->redist_vcpu;
+
+	spin_lock(&dist->lock);
 	offset -= range->base;
 	if (vgic_validate_access(dist, range, offset)) {
-		updated_state = call_range_handler(vcpu, mmio, offset, range);
+		updated_state = call_range_handler(vcpu, &mmio, offset, range);
 	} else {
-		if (!mmio->is_write)
-			memset(mmio->data, 0, mmio->len);
+		if (!is_write)
+			memset(val, 0, len);
 		updated_state = false;
 	}
-	spin_unlock(&vcpu->kvm->arch.vgic.lock);
-	kvm_prepare_mmio(run, mmio);
+	spin_unlock(&dist->lock);
+	run->mmio.is_write	= is_write;
+	run->mmio.len		= len;
+	run->mmio.phys_addr	= addr;
+	memcpy(run->mmio.data, val, len);
+
 	kvm_handle_mmio_return(vcpu, run);
 
 	if (updated_state)
 		vgic_kick_vcpus(vcpu->kvm);
 
-	return true;
+	return 0;
 }
 
-/**
- * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
- * @vcpu:      pointer to the vcpu performing the access
- * @run:       pointer to the kvm_run structure
- * @mmio:      pointer to the data describing the access
- *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- * Calls the actual handling routine for the selected VGIC model.
- */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio)
+static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
+				 struct kvm_io_device *this,
+				 gpa_t addr, int len, void *val)
 {
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return false;
+	return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
+}
 
-	/*
-	 * This will currently call either vgic_v2_handle_mmio() or
-	 * vgic_v3_handle_mmio(), which in turn will call
-	 * vgic_handle_mmio_range() defined above.
-	 */
-	return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
+static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
+				  struct kvm_io_device *this,
+				  gpa_t addr, int len, const void *val)
+{
+	return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
+				       true);
+}
+
+struct kvm_io_device_ops vgic_io_ops = {
+	.read	= vgic_handle_mmio_read,
+	.write	= vgic_handle_mmio_write,
+};
+
+/**
+ * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
+ * @kvm:            The VM structure pointer
+ * @base:           The (guest) base address for the register frame
+ * @len:            Length of the register frame window
+ * @ranges:         Describing the handler functions for each register
+ * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
+ * @iodev:          Points to memory to be passed on to the handler
+ *
+ * @iodev stores the parameters of this function to be usable by the handler
+ * respectively the dispatcher function (since the KVM I/O bus framework lacks
+ * an opaque parameter). Initialization is done in this function, but the
+ * reference should be valid and unique for the whole VGIC lifetime.
+ * If the register frame is not mapped for a specific VCPU, pass -1 to
+ * @redist_vcpu_id.
+ */
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+			     const struct vgic_io_range *ranges,
+			     int redist_vcpu_id,
+			     struct vgic_io_device *iodev)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int ret;
+
+	if (redist_vcpu_id >= 0)
+		vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
+
+	iodev->addr		= base;
+	iodev->len		= len;
+	iodev->reg_ranges	= ranges;
+	iodev->redist_vcpu	= vcpu;
+
+	kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
+
+	mutex_lock(&kvm->slots_lock);
+
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
+				      &iodev->dev);
+	mutex_unlock(&kvm->slots_lock);
+
+	/* Mark the iodev as invalid if registration fails. */
+	if (ret)
+		iodev->dev.ops = NULL;
+
+	return ret;
 }
 
 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
@@ -804,6 +917,36 @@
 	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
 }
 
+static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	unsigned long *active, *enabled, *act_percpu, *act_shared;
+	unsigned long active_private, active_shared;
+	int nr_shared = vgic_nr_shared_irqs(dist);
+	int vcpu_id;
+
+	vcpu_id = vcpu->vcpu_id;
+	act_percpu = vcpu->arch.vgic_cpu.active_percpu;
+	act_shared = vcpu->arch.vgic_cpu.active_shared;
+
+	active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
+	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+	bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
+
+	active = vgic_bitmap_get_shared_map(&dist->irq_active);
+	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+	bitmap_and(act_shared, active, enabled, nr_shared);
+	bitmap_and(act_shared, act_shared,
+		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+		   nr_shared);
+
+	active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
+	active_shared = find_first_bit(act_shared, nr_shared);
+
+	return (active_private < VGIC_NR_PRIVATE_IRQS ||
+		active_shared < nr_shared);
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -835,7 +978,7 @@
 
 /*
  * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
+ * or active interrupts. Must be called with distributor lock held.
  */
 void vgic_update_state(struct kvm *kvm)
 {
@@ -849,10 +992,13 @@
 	}
 
 	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (compute_pending_for_cpu(vcpu)) {
-			pr_debug("CPU%d has pending interrupts\n", c);
+		if (compute_pending_for_cpu(vcpu))
 			set_bit(c, dist->irq_pending_on_cpu);
-		}
+
+		if (compute_active_for_cpu(vcpu))
+			set_bit(c, dist->irq_active_on_cpu);
+		else
+			clear_bit(c, dist->irq_active_on_cpu);
 	}
 }
 
@@ -955,6 +1101,26 @@
 	}
 }
 
+static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
+				 int lr_nr, struct vgic_lr vlr)
+{
+	if (vgic_irq_is_active(vcpu, irq)) {
+		vlr.state |= LR_STATE_ACTIVE;
+		kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
+		vgic_irq_clear_active(vcpu, irq);
+		vgic_update_state(vcpu->kvm);
+	} else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+		vlr.state |= LR_STATE_PENDING;
+		kvm_debug("Set pending: 0x%x\n", vlr.state);
+	}
+
+	if (!vgic_irq_is_edge(vcpu, irq))
+		vlr.state |= LR_EOI_INT;
+
+	vgic_set_lr(vcpu, lr_nr, vlr);
+	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
+}
+
 /*
  * Queue an interrupt to a CPU virtual interface. Return true on success,
  * or false if it wasn't possible to queue it.
@@ -982,9 +1148,7 @@
 		if (vlr.source == sgi_source_id) {
 			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
 			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-			vlr.state |= LR_STATE_PENDING;
-			vgic_set_lr(vcpu, lr, vlr);
-			vgic_sync_lr_elrsr(vcpu, lr, vlr);
+			vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
 			return true;
 		}
 	}
@@ -1001,12 +1165,8 @@
 
 	vlr.irq = irq;
 	vlr.source = sgi_source_id;
-	vlr.state = LR_STATE_PENDING;
-	if (!vgic_irq_is_edge(vcpu, irq))
-		vlr.state |= LR_EOI_INT;
-
-	vgic_set_lr(vcpu, lr, vlr);
-	vgic_sync_lr_elrsr(vcpu, lr, vlr);
+	vlr.state = 0;
+	vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
 
 	return true;
 }
@@ -1038,39 +1198,49 @@
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	unsigned long *pa_percpu, *pa_shared;
 	int i, vcpu_id;
 	int overflow = 0;
+	int nr_shared = vgic_nr_shared_irqs(dist);
 
 	vcpu_id = vcpu->vcpu_id;
 
+	pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
+	pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
+
+	bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
+		  VGIC_NR_PRIVATE_IRQS);
+	bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
+		  nr_shared);
 	/*
 	 * We may not have any pending interrupt, or the interrupts
 	 * may have been serviced from another vcpu. In all cases,
 	 * move along.
 	 */
-	if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
-		pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+	if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
 		goto epilog;
-	}
 
 	/* SGIs */
-	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+	for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
 		if (!queue_sgi(vcpu, i))
 			overflow = 1;
 	}
 
 	/* PPIs */
-	for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+	for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
 		if (!vgic_queue_hwirq(vcpu, i))
 			overflow = 1;
 	}
 
 	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+	for_each_set_bit(i, pa_shared, nr_shared) {
 		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
 			overflow = 1;
 	}
 
+
+
+
 epilog:
 	if (overflow) {
 		vgic_enable_underflow(vcpu);
@@ -1089,7 +1259,9 @@
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
 	u32 status = vgic_get_interrupt_status(vcpu);
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	bool level_pending = false;
+	struct kvm *kvm = vcpu->kvm;
 
 	kvm_debug("STATUS = %08x\n", status);
 
@@ -1106,6 +1278,7 @@
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
+			spin_lock(&dist->lock);
 			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
@@ -1124,6 +1297,17 @@
 			 */
 			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
 
+			/*
+			 * kvm_notify_acked_irq calls kvm_set_irq()
+			 * to reset the IRQ level. Need to release the
+			 * lock for kvm_set_irq to grab it.
+			 */
+			spin_unlock(&dist->lock);
+
+			kvm_notify_acked_irq(kvm, 0,
+					     vlr.irq - VGIC_NR_PRIVATE_IRQS);
+			spin_lock(&dist->lock);
+
 			/* Any additional pending interrupt? */
 			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
 				vgic_cpu_irq_set(vcpu, vlr.irq);
@@ -1133,6 +1317,8 @@
 				vgic_cpu_irq_clear(vcpu, vlr.irq);
 			}
 
+			spin_unlock(&dist->lock);
+
 			/*
 			 * Despite being EOIed, the LR may not have
 			 * been marked as empty.
@@ -1155,10 +1341,7 @@
 	return level_pending;
 }
 
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
+/* Sync back the VGIC state after a guest run */
 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1205,14 +1388,10 @@
 
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
 	if (!irqchip_in_kernel(vcpu->kvm))
 		return;
 
-	spin_lock(&dist->lock);
 	__kvm_vgic_sync_hwstate(vcpu);
-	spin_unlock(&dist->lock);
 }
 
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
@@ -1225,6 +1404,17 @@
 	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 0;
+
+	return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+
 void vgic_kick_vcpus(struct kvm *kvm)
 {
 	struct kvm_vcpu *vcpu;
@@ -1397,8 +1587,12 @@
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
 	kfree(vgic_cpu->pending_shared);
+	kfree(vgic_cpu->active_shared);
+	kfree(vgic_cpu->pend_act_shared);
 	kfree(vgic_cpu->vgic_irq_lr_map);
 	vgic_cpu->pending_shared = NULL;
+	vgic_cpu->active_shared = NULL;
+	vgic_cpu->pend_act_shared = NULL;
 	vgic_cpu->vgic_irq_lr_map = NULL;
 }
 
@@ -1408,9 +1602,14 @@
 
 	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+	vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
+	vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
-	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+	if (!vgic_cpu->pending_shared
+		|| !vgic_cpu->active_shared
+		|| !vgic_cpu->pend_act_shared
+		|| !vgic_cpu->vgic_irq_lr_map) {
 		kvm_vgic_vcpu_destroy(vcpu);
 		return -ENOMEM;
 	}
@@ -1463,10 +1662,12 @@
 	kfree(dist->irq_spi_mpidr);
 	kfree(dist->irq_spi_target);
 	kfree(dist->irq_pending_on_cpu);
+	kfree(dist->irq_active_on_cpu);
 	dist->irq_sgi_sources = NULL;
 	dist->irq_spi_cpu = NULL;
 	dist->irq_spi_target = NULL;
 	dist->irq_pending_on_cpu = NULL;
+	dist->irq_active_on_cpu = NULL;
 	dist->nr_cpus = 0;
 }
 
@@ -1502,6 +1703,7 @@
 	ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
 	ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
 	ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
 	ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
 	ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
 
@@ -1514,10 +1716,13 @@
 				       GFP_KERNEL);
 	dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
 					   GFP_KERNEL);
+	dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+					   GFP_KERNEL);
 	if (!dist->irq_sgi_sources ||
 	    !dist->irq_spi_cpu ||
 	    !dist->irq_spi_target ||
-	    !dist->irq_pending_on_cpu) {
+	    !dist->irq_pending_on_cpu ||
+	    !dist->irq_active_on_cpu) {
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1845,12 +2050,9 @@
 	return r;
 }
 
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset)
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
 {
-	struct kvm_exit_mmio dev_attr_mmio;
-
-	dev_attr_mmio.len = 4;
-	if (vgic_find_range(ranges, &dev_attr_mmio, offset))
+	if (vgic_find_range(ranges, 4, offset))
 		return 0;
 	else
 		return -ENXIO;
@@ -1883,8 +2085,10 @@
 };
 
 static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{ .compatible = "arm,cortex-a15-gic",	.data = vgic_v2_probe, },
+	{ .compatible = "arm,cortex-a7-gic",	.data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-400",		.data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3",		.data = vgic_v3_probe, },
 	{},
 };
 
@@ -1932,3 +2136,38 @@
 	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
 	return ret;
 }
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries,
+		    int gsi)
+{
+	return gsi;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	return pin;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id,
+		u32 irq, int level, bool line_status)
+{
+	unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	BUG_ON(!vgic_initialized(kvm));
+
+	if (spi > kvm->arch.vgic.nr_irqs)
+		return -EINVAL;
+	return kvm_vgic_inject_irq(kvm, 0, spi, level);
+
+}
+
+/* MSI not implemented yet */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id,
+		int level, bool line_status)
+{
+	return 0;
+}
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
index 1e83bdf..0df74cb 100644
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -20,6 +20,8 @@
 #ifndef __KVM_VGIC_H__
 #define __KVM_VGIC_H__
 
+#include <kvm/iodev.h>
+
 #define VGIC_ADDR_UNDEF		(-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
@@ -57,6 +59,14 @@
 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
 
+struct kvm_exit_mmio {
+	phys_addr_t	phys_addr;
+	void		*data;
+	u32		len;
+	bool		is_write;
+	void		*private;
+};
+
 void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
 		     phys_addr_t offset, int mode);
 bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
@@ -74,7 +84,7 @@
 	*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
 }
 
-struct kvm_mmio_range {
+struct vgic_io_range {
 	phys_addr_t base;
 	unsigned long len;
 	int bits_per_irq;
@@ -82,6 +92,11 @@
 			    phys_addr_t offset);
 };
 
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+			     const struct vgic_io_range *ranges,
+			     int redist_id,
+			     struct vgic_io_device *iodev);
+
 static inline bool is_in_range(phys_addr_t addr, unsigned long len,
 			       phys_addr_t baseaddr, unsigned long size)
 {
@@ -89,14 +104,8 @@
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t offset);
-
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
-			    struct kvm_exit_mmio *mmio,
-			    const struct kvm_mmio_range *ranges,
-			    unsigned long mmio_base);
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+				      int len, gpa_t offset);
 
 bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
 			    phys_addr_t offset, int vcpu_id, int access);
@@ -107,12 +116,20 @@
 bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
 				   phys_addr_t offset, int vcpu_id);
 
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+				struct kvm_exit_mmio *mmio,
+				phys_addr_t offset, int vcpu_id);
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset, int vcpu_id);
+
 bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 			 phys_addr_t offset);
 
 void vgic_kick_vcpus(struct kvm *kvm);
 
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset);
 int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 00d8642..571c1ce 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -8,7 +8,7 @@
  *
  */
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
@@ -60,8 +60,9 @@
 	return 1;
 }
 
-static int coalesced_mmio_write(struct kvm_io_device *this,
-				gpa_t addr, int len, const void *val)
+static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
+				struct kvm_io_device *this, gpa_t addr,
+				int len, const void *val)
 {
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 148b239..9ff4193 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,7 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
 /*
@@ -311,6 +311,9 @@
 	unsigned int events;
 	int idx;
 
+	if (!kvm_arch_intc_initialized(kvm))
+		return -EAGAIN;
+
 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
 	if (!irqfd)
 		return -ENOMEM;
@@ -712,8 +715,8 @@
 
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
-ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
-		const void *val)
+ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
+		int len, const void *val)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
 
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 7f256f3..1d56a90 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -105,7 +105,7 @@
 	i = kvm_irq_map_gsi(kvm, irq_set, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
-	while(i--) {
+	while (i--) {
 		int r;
 		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
 				   line_status);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc6a25d..d3fc939 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -16,7 +16,7 @@
  *
  */
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
@@ -66,13 +66,13 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-unsigned int halt_poll_ns = 0;
+static unsigned int halt_poll_ns;
 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
 
 /*
  * Ordering of locks:
  *
- * 		kvm->lock --> kvm->slots_lock --> kvm->irq_lock
+ *	kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
 DEFINE_SPINLOCK(kvm_lock);
@@ -80,7 +80,7 @@
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
-static int kvm_usage_count = 0;
+static int kvm_usage_count;
 static atomic_t hardware_enable_failed;
 
 struct kmem_cache *kvm_vcpu_cache;
@@ -539,20 +539,12 @@
 		return kzalloc(size, GFP_KERNEL);
 }
 
-void kvm_kvfree(const void *addr)
-{
-	if (is_vmalloc_addr(addr))
-		vfree(addr);
-	else
-		kfree(addr);
-}
-
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
 	if (!memslot->dirty_bitmap)
 		return;
 
-	kvm_kvfree(memslot->dirty_bitmap);
+	kvfree(memslot->dirty_bitmap);
 	memslot->dirty_bitmap = NULL;
 }
 
@@ -888,8 +880,8 @@
 		 * or moved, memslot will be created.
 		 *
 		 * validation of sp->gfn happens in:
-		 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
-		 * 	- kvm_is_visible_gfn (mmu_check_roots)
+		 *	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+		 *	- kvm_is_visible_gfn (mmu_check_roots)
 		 */
 		kvm_arch_flush_shadow_memslot(kvm, slot);
 
@@ -1061,9 +1053,11 @@
 		mask = xchg(&dirty_bitmap[i], 0);
 		dirty_bitmap_buffer[i] = mask;
 
-		offset = i * BITS_PER_LONG;
-		kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
-								mask);
+		if (mask) {
+			offset = i * BITS_PER_LONG;
+			kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+								offset, mask);
+		}
 	}
 
 	spin_unlock(&kvm->mmu_lock);
@@ -1193,16 +1187,6 @@
 	return gfn_to_hva_memslot_prot(slot, gfn, writable);
 }
 
-static int kvm_read_hva(void *data, void __user *hva, int len)
-{
-	return __copy_from_user(data, hva, len);
-}
-
-static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
-{
-	return __copy_from_user_inatomic(data, hva, len);
-}
-
 static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	unsigned long start, int write, struct page **page)
 {
@@ -1481,7 +1465,6 @@
 
 	return kvm_pfn_to_page(pfn);
 }
-
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
@@ -1517,6 +1500,7 @@
 {
 	if (!kvm_is_reserved_pfn(pfn)) {
 		struct page *page = pfn_to_page(pfn);
+
 		if (!PageReserved(page))
 			SetPageDirty(page);
 	}
@@ -1554,7 +1538,7 @@
 	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
-	r = kvm_read_hva(data, (void __user *)addr + offset, len);
+	r = __copy_from_user(data, (void __user *)addr + offset, len);
 	if (r)
 		return -EFAULT;
 	return 0;
@@ -1593,7 +1577,7 @@
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	pagefault_disable();
-	r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
+	r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
 	pagefault_enable();
 	if (r)
 		return -EFAULT;
@@ -1653,8 +1637,8 @@
 	ghc->generation = slots->generation;
 	ghc->len = len;
 	ghc->memslot = gfn_to_memslot(kvm, start_gfn);
-	ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
-	if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
+	ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
+	if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
 		ghc->hva += offset;
 	} else {
 		/*
@@ -1742,7 +1726,7 @@
 	int offset = offset_in_page(gpa);
 	int ret;
 
-        while ((seg = next_segment(len, offset)) != 0) {
+	while ((seg = next_segment(len, offset)) != 0) {
 		ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
 		if (ret < 0)
 			return ret;
@@ -1800,6 +1784,7 @@
 	start = cur = ktime_get();
 	if (halt_poll_ns) {
 		ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+
 		do {
 			/*
 			 * This sets KVM_REQ_UNHALT if an interrupt
@@ -2118,7 +2103,7 @@
 	 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
 	 * so vcpu_load() would break it.
 	 */
-	if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
+	if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
 		return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
 #endif
 
@@ -2135,6 +2120,7 @@
 			/* The thread running this VCPU changed. */
 			struct pid *oldpid = vcpu->pid;
 			struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+
 			rcu_assign_pointer(vcpu->pid, newpid);
 			if (oldpid)
 				synchronize_rcu();
@@ -2205,7 +2191,7 @@
 		if (r)
 			goto out;
 		r = -EFAULT;
-		if (copy_to_user(argp, &mp_state, sizeof mp_state))
+		if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
 			goto out;
 		r = 0;
 		break;
@@ -2214,7 +2200,7 @@
 		struct kvm_mp_state mp_state;
 
 		r = -EFAULT;
-		if (copy_from_user(&mp_state, argp, sizeof mp_state))
+		if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
 			goto out;
 		r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
 		break;
@@ -2223,13 +2209,13 @@
 		struct kvm_translation tr;
 
 		r = -EFAULT;
-		if (copy_from_user(&tr, argp, sizeof tr))
+		if (copy_from_user(&tr, argp, sizeof(tr)))
 			goto out;
 		r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
 		if (r)
 			goto out;
 		r = -EFAULT;
-		if (copy_to_user(argp, &tr, sizeof tr))
+		if (copy_to_user(argp, &tr, sizeof(tr)))
 			goto out;
 		r = 0;
 		break;
@@ -2238,7 +2224,7 @@
 		struct kvm_guest_debug dbg;
 
 		r = -EFAULT;
-		if (copy_from_user(&dbg, argp, sizeof dbg))
+		if (copy_from_user(&dbg, argp, sizeof(dbg)))
 			goto out;
 		r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
 		break;
@@ -2252,14 +2238,14 @@
 		if (argp) {
 			r = -EFAULT;
 			if (copy_from_user(&kvm_sigmask, argp,
-					   sizeof kvm_sigmask))
+					   sizeof(kvm_sigmask)))
 				goto out;
 			r = -EINVAL;
-			if (kvm_sigmask.len != sizeof sigset)
+			if (kvm_sigmask.len != sizeof(sigset))
 				goto out;
 			r = -EFAULT;
 			if (copy_from_user(&sigset, sigmask_arg->sigset,
-					   sizeof sigset))
+					   sizeof(sigset)))
 				goto out;
 			p = &sigset;
 		}
@@ -2321,14 +2307,14 @@
 		if (argp) {
 			r = -EFAULT;
 			if (copy_from_user(&kvm_sigmask, argp,
-					   sizeof kvm_sigmask))
+					   sizeof(kvm_sigmask)))
 				goto out;
 			r = -EINVAL;
-			if (kvm_sigmask.len != sizeof csigset)
+			if (kvm_sigmask.len != sizeof(csigset))
 				goto out;
 			r = -EFAULT;
 			if (copy_from_user(&csigset, sigmask_arg->sigset,
-					   sizeof csigset))
+					   sizeof(csigset)))
 				goto out;
 			sigset_from_compat(&sigset, &csigset);
 			r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
@@ -2525,7 +2511,7 @@
 
 		r = -EFAULT;
 		if (copy_from_user(&kvm_userspace_mem, argp,
-						sizeof kvm_userspace_mem))
+						sizeof(kvm_userspace_mem)))
 			goto out;
 
 		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
@@ -2535,7 +2521,7 @@
 		struct kvm_dirty_log log;
 
 		r = -EFAULT;
-		if (copy_from_user(&log, argp, sizeof log))
+		if (copy_from_user(&log, argp, sizeof(log)))
 			goto out;
 		r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
 		break;
@@ -2543,16 +2529,18 @@
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	case KVM_REGISTER_COALESCED_MMIO: {
 		struct kvm_coalesced_mmio_zone zone;
+
 		r = -EFAULT;
-		if (copy_from_user(&zone, argp, sizeof zone))
+		if (copy_from_user(&zone, argp, sizeof(zone)))
 			goto out;
 		r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
 		break;
 	}
 	case KVM_UNREGISTER_COALESCED_MMIO: {
 		struct kvm_coalesced_mmio_zone zone;
+
 		r = -EFAULT;
-		if (copy_from_user(&zone, argp, sizeof zone))
+		if (copy_from_user(&zone, argp, sizeof(zone)))
 			goto out;
 		r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
 		break;
@@ -2562,7 +2550,7 @@
 		struct kvm_irqfd data;
 
 		r = -EFAULT;
-		if (copy_from_user(&data, argp, sizeof data))
+		if (copy_from_user(&data, argp, sizeof(data)))
 			goto out;
 		r = kvm_irqfd(kvm, &data);
 		break;
@@ -2571,7 +2559,7 @@
 		struct kvm_ioeventfd data;
 
 		r = -EFAULT;
-		if (copy_from_user(&data, argp, sizeof data))
+		if (copy_from_user(&data, argp, sizeof(data)))
 			goto out;
 		r = kvm_ioeventfd(kvm, &data);
 		break;
@@ -2592,7 +2580,7 @@
 		struct kvm_msi msi;
 
 		r = -EFAULT;
-		if (copy_from_user(&msi, argp, sizeof msi))
+		if (copy_from_user(&msi, argp, sizeof(msi)))
 			goto out;
 		r = kvm_send_userspace_msi(kvm, &msi);
 		break;
@@ -2604,7 +2592,7 @@
 		struct kvm_irq_level irq_event;
 
 		r = -EFAULT;
-		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+		if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
 			goto out;
 
 		r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
@@ -2614,7 +2602,7 @@
 
 		r = -EFAULT;
 		if (ioctl == KVM_IRQ_LINE_STATUS) {
-			if (copy_to_user(argp, &irq_event, sizeof irq_event))
+			if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
 				goto out;
 		}
 
@@ -2647,7 +2635,7 @@
 			goto out_free_irq_routing;
 		r = kvm_set_irq_routing(kvm, entries, routing.nr,
 					routing.flags);
-	out_free_irq_routing:
+out_free_irq_routing:
 		vfree(entries);
 		break;
 	}
@@ -2822,8 +2810,7 @@
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
 		atomic_inc(&hardware_enable_failed);
-		printk(KERN_INFO "kvm: enabling virtualization on "
-				 "CPU%d failed\n", cpu);
+		pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
 	}
 }
 
@@ -2899,12 +2886,12 @@
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
-		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+		pr_info("kvm: disabling virtualization on CPU%d\n",
 		       cpu);
 		hardware_disable();
 		break;
 	case CPU_STARTING:
-		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
+		pr_info("kvm: enabling virtualization on CPU%d\n",
 		       cpu);
 		hardware_enable();
 		break;
@@ -2921,7 +2908,7 @@
 	 *
 	 * And Intel TXT required VMX off for all cpu when system shutdown.
 	 */
-	printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+	pr_info("kvm: exiting hardware virtualization\n");
 	kvm_rebooting = true;
 	on_each_cpu(hardware_disable_nolock, NULL, 1);
 	return NOTIFY_OK;
@@ -2945,7 +2932,7 @@
 }
 
 static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
-                                 const struct kvm_io_range *r2)
+				 const struct kvm_io_range *r2)
 {
 	if (r1->addr < r2->addr)
 		return -1;
@@ -2998,7 +2985,7 @@
 	return off;
 }
 
-static int __kvm_io_bus_write(struct kvm_io_bus *bus,
+static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
 			      struct kvm_io_range *range, const void *val)
 {
 	int idx;
@@ -3009,7 +2996,7 @@
 
 	while (idx < bus->dev_count &&
 		kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
-		if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
+		if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
 					range->len, val))
 			return idx;
 		idx++;
@@ -3019,7 +3006,7 @@
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
 	struct kvm_io_bus *bus;
@@ -3031,14 +3018,14 @@
 		.len = len,
 	};
 
-	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	r = __kvm_io_bus_write(bus, &range, val);
+	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	r = __kvm_io_bus_write(vcpu, bus, &range, val);
 	return r < 0 ? r : 0;
 }
 
 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-			    int len, const void *val, long cookie)
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+			    gpa_t addr, int len, const void *val, long cookie)
 {
 	struct kvm_io_bus *bus;
 	struct kvm_io_range range;
@@ -3048,12 +3035,12 @@
 		.len = len,
 	};
 
-	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
 
 	/* First try the device referenced by cookie. */
 	if ((cookie >= 0) && (cookie < bus->dev_count) &&
 	    (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
-		if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
+		if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
 					val))
 			return cookie;
 
@@ -3061,11 +3048,11 @@
 	 * cookie contained garbage; fall back to search and return the
 	 * correct cookie value.
 	 */
-	return __kvm_io_bus_write(bus, &range, val);
+	return __kvm_io_bus_write(vcpu, bus, &range, val);
 }
 
-static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
-			     void *val)
+static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
+			     struct kvm_io_range *range, void *val)
 {
 	int idx;
 
@@ -3075,7 +3062,7 @@
 
 	while (idx < bus->dev_count &&
 		kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
-		if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
+		if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
 				       range->len, val))
 			return idx;
 		idx++;
@@ -3086,7 +3073,7 @@
 EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 		    int len, void *val)
 {
 	struct kvm_io_bus *bus;
@@ -3098,8 +3085,8 @@
 		.len = len,
 	};
 
-	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	r = __kvm_io_bus_read(bus, &range, val);
+	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	r = __kvm_io_bus_read(vcpu, bus, &range, val);
 	return r < 0 ? r : 0;
 }
 
@@ -3269,6 +3256,7 @@
 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
@@ -3350,7 +3338,7 @@
 
 	r = misc_register(&kvm_dev);
 	if (r) {
-		printk(KERN_ERR "kvm: misc device register failed\n");
+		pr_err("kvm: misc device register failed\n");
 		goto out_unreg;
 	}
 
@@ -3361,7 +3349,7 @@
 
 	r = kvm_init_debug();
 	if (r) {
-		printk(KERN_ERR "kvm: create debugfs files failed\n");
+		pr_err("kvm: create debugfs files failed\n");
 		goto out_undebugfs;
 	}