Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:

 - most of the rest of MM

 - a small number of misc things

 - lib/ updates

 - checkpatch

 - autofs updates

 - ipc/ updates

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (126 commits)
  ipc: optimize semget/shmget/msgget for lots of keys
  ipc/sem: play nicer with large nsops allocations
  ipc/sem: drop sem_checkid helper
  ipc: convert kern_ipc_perm.refcount from atomic_t to refcount_t
  ipc: convert sem_undo_list.refcnt from atomic_t to refcount_t
  ipc: convert ipc_namespace.count from atomic_t to refcount_t
  kcov: support compat processes
  sh: defconfig: cleanup from old Kconfig options
  mn10300: defconfig: cleanup from old Kconfig options
  m32r: defconfig: cleanup from old Kconfig options
  drivers/pps: use surrounding "if PPS" to remove numerous dependency checks
  drivers/pps: aesthetic tweaks to PPS-related content
  cpumask: make cpumask_next() out-of-line
  kmod: move #ifdef CONFIG_MODULES wrapper to Makefile
  kmod: split off umh headers into its own file
  MAINTAINERS: clarify kmod is just a kernel module loader
  kmod: split out umh code into its own file
  test_kmod: flip INT checks to be consistent
  test_kmod: remove paranoid UINT_MAX check on uint range processing
  vfat: deduplicate hex2bin()
  ...
diff --git a/CREDITS b/CREDITS
index 5d09c26..0d2d60d 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2090,7 +2090,7 @@
 
 N: Mohit Kumar
 D: ST Microelectronics SPEAr13xx PCI host bridge driver
-D: Synopsys Designware PCI host bridge driver
+D: Synopsys DesignWare PCI host bridge driver
 
 N: Gabor Kuti
 E: seasons@falcon.sch.bme.hu
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 86b0e8e..0549662 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2764,6 +2764,15 @@
 			If the dependencies are under your control, you can
 			turn on cpu0_hotplug.
 
+	nps_mtm_hs_ctr= [KNL,ARC]
+			This parameter sets the maximum duration, in
+			cycles, each HW thread of the CTOP can run
+			without interruptions, before HW switches it.
+			The actual maximum duration is 16 times this
+			parameter's value.
+			Format: integer between 1 and 255
+			Default: 255
+
 	nptcg=		[IA-64] Override max number of concurrent global TLB
 			purges which is reported from either PAL_VM_SUMMARY or
 			SAL PALO.
diff --git a/Documentation/devicetree/bindings/arc/hsdk.txt b/Documentation/devicetree/bindings/arc/hsdk.txt
new file mode 100644
index 0000000..be50654
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/hsdk.txt
@@ -0,0 +1,7 @@
+Synopsys DesignWare ARC HS Development Kit Device Tree Bindings
+---------------------------------------------------------------------------
+
+ARC HSDK Board with quad-core ARC HS38x4 in silicon.
+
+Required root node properties:
+    - compatible = "snps,hsdk";
diff --git a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
index 35a4653..b9165b7 100644
--- a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
+++ b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
@@ -1,11 +1,11 @@
 * Freescale 83xx and 512x PCI bridges
 
-Freescale 83xx and 512x SOCs include the same pci bridge core.
+Freescale 83xx and 512x SOCs include the same PCI bridge core.
 
 83xx/512x specific notes:
 - reg: should contain two address length tuples
-    The first is for the internal pci bridge registers
-    The second is for the pci config space access registers
+    The first is for the internal PCI bridge registers
+    The second is for the PCI config space access registers
 
 Example (MPC8313ERDB)
 	pci0: pci@e0008500 {
diff --git a/Documentation/devicetree/bindings/pci/altera-pcie.txt b/Documentation/devicetree/bindings/pci/altera-pcie.txt
index 2951a6a..4958801 100644
--- a/Documentation/devicetree/bindings/pci/altera-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/altera-pcie.txt
@@ -7,21 +7,21 @@
 		"Txs": TX slave port region
 		"Cra": Control register access region
 - interrupt-parent:	interrupt source phandle.
-- interrupts:	specifies the interrupt source of the parent interrupt controller.
-		The format of the interrupt specifier depends on the parent interrupt
-		controller.
+- interrupts:	specifies the interrupt source of the parent interrupt
+		controller.  The format of the interrupt specifier depends
+		on the parent interrupt controller.
 - device_type:	must be "pci"
 - #address-cells:	set to <3>
-- #size-cells:	set to <2>
+- #size-cells:		set to <2>
 - #interrupt-cells:	set to <1>
-- ranges:		describes the translation of addresses for root ports and standard
-		PCI regions.
+- ranges:	describes the translation of addresses for root ports and
+		standard PCI regions.
 - interrupt-map-mask and interrupt-map: standard PCI properties to define the
 		mapping of the PCIe interface to interrupt numbers.
 
 Optional properties:
-- msi-parent:	Link to the hardware entity that serves as the MSI controller for this PCIe
-		controller.
+- msi-parent:	Link to the hardware entity that serves as the MSI controller
+		for this PCIe controller.
 - bus-range:	PCI bus numbers covered
 
 Example
@@ -45,5 +45,5 @@
 			            <0 0 0 3 &pcie_0 3>,
 			            <0 0 0 4 &pcie_0 4>;
 		ranges = <0x82000000 0x00000000 0x00000000 0xc0000000 0x00000000 0x10000000
-			    0x82000000 0x00000000 0x10000000 0xd0000000 0x00000000 0x10000000>;
+			  0x82000000 0x00000000 0x10000000 0xd0000000 0x00000000 0x10000000>;
 	};
diff --git a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt
index 5ecaea1..4e4aee4 100644
--- a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt
@@ -6,7 +6,7 @@
 Required properties:
 - compatible: "axis,artpec6-pcie", "snps,dw-pcie"
 - reg: base addresses and lengths of the PCIe controller (DBI),
-	the phy controller, and configuration address space.
+	the PHY controller, and configuration address space.
 - reg-names: Must include the following entries:
 	- "dbi"
 	- "phy"
diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt
index b2480dd..1da7ade 100644
--- a/Documentation/devicetree/bindings/pci/designware-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt
@@ -1,4 +1,4 @@
-* Synopsys Designware PCIe interface
+* Synopsys DesignWare PCIe interface
 
 Required properties:
 - compatible: should contain "snps,dw-pcie" to identify the core.
@@ -17,29 +17,27 @@
 	properties to define the mapping of the PCIe interface to interrupt
 	numbers.
 EP mode:
-- num-ib-windows: number of inbound address translation
-        windows
-- num-ob-windows: number of outbound address translation
-        windows
+- num-ib-windows: number of inbound address translation windows
+- num-ob-windows: number of outbound address translation windows
 
 Optional properties:
 - num-lanes: number of lanes to use (this property should be specified unless
   the link is brought already up in BIOS)
-- reset-gpio: gpio pin number of power good signal
+- reset-gpio: GPIO pin number of power good signal
 - clocks: Must contain an entry for each entry in clock-names.
 	See ../clocks/clock-bindings.txt for details.
 - clock-names: Must include the following entries:
 	- "pcie"
 	- "pcie_bus"
 RC mode:
-- num-viewport: number of view ports configured in
-  hardware. If a platform does not specify it, the driver assumes 2.
-- bus-range: PCI bus numbers covered (it is recommended
-  for new devicetrees to specify this property, to keep backwards
-  compatibility a range of 0x00-0xff is assumed if not present)
+- num-viewport: number of view ports configured in hardware. If a platform
+  does not specify it, the driver assumes 2.
+- bus-range: PCI bus numbers covered (it is recommended for new devicetrees
+  to specify this property, to keep backwards compatibility a range of
+  0x00-0xff is assumed if not present)
+
 EP mode:
-- max-functions: maximum number of functions that can be
-  configured
+- max-functions: maximum number of functions that can be configured
 
 Example configuration:
 
diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
index cf92d3b..7b1e48b 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
@@ -1,6 +1,6 @@
 * Freescale i.MX6 PCIe interface
 
-This PCIe host controller is based on the Synopsis Designware PCIe IP
+This PCIe host controller is based on the Synopsys DesignWare PCIe IP
 and thus inherits all the common properties defined in designware-pcie.txt.
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
index 7a579c8..bdb7ab3 100644
--- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
@@ -1,7 +1,7 @@
 HiSilicon Hip05 and Hip06 PCIe host bridge DT description
 
-HiSilicon PCIe host controller is based on Designware PCI core.
-It shares common functions with PCIe Designware core driver and inherits
+HiSilicon PCIe host controller is based on the Synopsys DesignWare PCI core.
+It shares common functions with the PCIe DesignWare core driver and inherits
 common properties defined in
 Documentation/devicetree/bindings/pci/designware-pci.txt.
 
diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
index 68ffa0f..6e217c6 100644
--- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt
@@ -1,8 +1,8 @@
 HiSilicon Kirin SoCs PCIe host DT description
 
-Kirin PCIe host controller is based on Designware PCI core.
-It shares common functions with PCIe Designware core driver
-and inherits common properties defined in
+Kirin PCIe host controller is based on the Synopsys DesignWare PCI core.
+It shares common functions with the PCIe DesignWare core driver and
+inherits common properties defined in
 Documentation/devicetree/bindings/pci/designware-pci.txt.
 
 Additional properties are described here:
@@ -16,7 +16,7 @@
   "apb": apb Ctrl register defined by Kirin;
   "phy": apb PHY register defined by Kirin;
   "config": PCIe configuration space registers.
-- reset-gpios: The gpio to generate PCIe perst assert and deassert signal.
+- reset-gpios: The GPIO to generate PCIe PERST# assert and deassert signal.
 
 Optional properties:
 
diff --git a/Documentation/devicetree/bindings/pci/layerscape-pci.txt b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
index ee1c72d5..c0484da 100644
--- a/Documentation/devicetree/bindings/pci/layerscape-pci.txt
+++ b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
@@ -15,8 +15,10 @@
 - compatible: should contain the platform identifier such as:
         "fsl,ls1021a-pcie", "snps,dw-pcie"
         "fsl,ls2080a-pcie", "fsl,ls2085a-pcie", "snps,dw-pcie"
+        "fsl,ls2088a-pcie"
+        "fsl,ls1088a-pcie"
         "fsl,ls1046a-pcie"
-- reg: base addresses and lengths of the PCIe controller
+- reg: base addresses and lengths of the PCIe controller register blocks.
 - interrupts: A list of interrupt outputs of the controller. Must contain an
   entry for each entry in the interrupt-names property.
 - interrupt-names: Must include the following entries:
diff --git a/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt b/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
deleted file mode 100644
index fe80dda..0000000
--- a/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
+++ /dev/null
@@ -1,130 +0,0 @@
-MediaTek Gen2 PCIe controller which is available on MT7623 series SoCs
-
-PCIe subsys supports single root complex (RC) with 3 Root Ports. Each root
-ports supports a Gen2 1-lane Link and has PIPE interface to PHY.
-
-Required properties:
-- compatible: Should contain "mediatek,mt7623-pcie".
-- device_type: Must be "pci"
-- reg: Base addresses and lengths of the PCIe controller.
-- #address-cells: Address representation for root ports (must be 3)
-- #size-cells: Size representation for root ports (must be 2)
-- #interrupt-cells: Size representation for interrupts (must be 1)
-- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
-  Please refer to the standard PCI bus binding document for a more detailed
-  explanation.
-- clocks: Must contain an entry for each entry in clock-names.
-  See ../clocks/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
-  - free_ck :for reference clock of PCIe subsys
-  - sys_ck0 :for clock of Port0
-  - sys_ck1 :for clock of Port1
-  - sys_ck2 :for clock of Port2
-- resets: Must contain an entry for each entry in reset-names.
-  See ../reset/reset.txt for details.
-- reset-names: Must include the following entries:
-  - pcie-rst0 :port0 reset
-  - pcie-rst1 :port1 reset
-  - pcie-rst2 :port2 reset
-- phys: List of PHY specifiers (used by generic PHY framework).
-- phy-names : Must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the
-  number of PHYs as specified in *phys* property.
-- power-domains: A phandle and power domain specifier pair to the power domain
-  which is responsible for collapsing and restoring power to the peripheral.
-- bus-range: Range of bus numbers associated with this controller.
-- ranges: Ranges for the PCI memory and I/O regions.
-
-In addition, the device tree node must have sub-nodes describing each
-PCIe port interface, having the following mandatory properties:
-
-Required properties:
-- device_type: Must be "pci"
-- reg: Only the first four bytes are used to refer to the correct bus number
-  and device number.
-- #address-cells: Must be 3
-- #size-cells: Must be 2
-- #interrupt-cells: Must be 1
-- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
-  Please refer to the standard PCI bus binding document for a more detailed
-  explanation.
-- ranges: Sub-ranges distributed from the PCIe controller node. An empty
-  property is sufficient.
-- num-lanes: Number of lanes to use for this port.
-
-Examples:
-
-	hifsys: syscon@1a000000 {
-		compatible = "mediatek,mt7623-hifsys",
-			     "mediatek,mt2701-hifsys",
-			     "syscon";
-		reg = <0 0x1a000000 0 0x1000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-	};
-
-	pcie: pcie-controller@1a140000 {
-		compatible = "mediatek,mt7623-pcie";
-		device_type = "pci";
-		reg = <0 0x1a140000 0 0x1000>, /* PCIe shared registers */
-		      <0 0x1a142000 0 0x1000>, /* Port0 registers */
-		      <0 0x1a143000 0 0x1000>, /* Port1 registers */
-		      <0 0x1a144000 0 0x1000>; /* Port2 registers */
-		#address-cells = <3>;
-		#size-cells = <2>;
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0xf800 0 0 0>;
-		interrupt-map = <0x0000 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>,
-				<0x0800 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>,
-				<0x1000 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>;
-		clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
-			 <&hifsys CLK_HIFSYS_PCIE0>,
-			 <&hifsys CLK_HIFSYS_PCIE1>,
-			 <&hifsys CLK_HIFSYS_PCIE2>;
-		clock-names = "free_ck", "sys_ck0", "sys_ck1", "sys_ck2";
-		resets = <&hifsys MT2701_HIFSYS_PCIE0_RST>,
-			 <&hifsys MT2701_HIFSYS_PCIE1_RST>,
-			 <&hifsys MT2701_HIFSYS_PCIE2_RST>;
-		reset-names = "pcie-rst0", "pcie-rst1", "pcie-rst2";
-		phys = <&pcie0_phy>, <&pcie1_phy>, <&pcie2_phy>;
-		phy-names = "pcie-phy0", "pcie-phy1", "pcie-phy2";
-		power-domains = <&scpsys MT2701_POWER_DOMAIN_HIF>;
-		bus-range = <0x00 0xff>;
-		ranges = <0x81000000 0 0x1a160000 0 0x1a160000 0 0x00010000	/* I/O space */
-			  0x83000000 0 0x60000000 0 0x60000000 0 0x10000000>;	/* memory space */
-
-		pcie@0,0 {
-			device_type = "pci";
-			reg = <0x0000 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
-			#interrupt-cells = <1>;
-			interrupt-map-mask = <0 0 0 0>;
-			interrupt-map = <0 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>;
-			ranges;
-			num-lanes = <1>;
-		};
-
-		pcie@1,0 {
-			device_type = "pci";
-			reg = <0x0800 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
-			#interrupt-cells = <1>;
-			interrupt-map-mask = <0 0 0 0>;
-			interrupt-map = <0 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>;
-			ranges;
-			num-lanes = <1>;
-		};
-
-		pcie@2,0 {
-			device_type = "pci";
-			reg = <0x1000 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
-			#interrupt-cells = <1>;
-			interrupt-map-mask = <0 0 0 0>;
-			interrupt-map = <0 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>;
-			ranges;
-			num-lanes = <1>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie.txt b/Documentation/devicetree/bindings/pci/mediatek-pcie.txt
new file mode 100644
index 0000000..3a6ce55
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie.txt
@@ -0,0 +1,284 @@
+MediaTek Gen2 PCIe controller
+
+Required properties:
+- compatible: Should contain one of the following strings:
+	"mediatek,mt2701-pcie"
+	"mediatek,mt2712-pcie"
+	"mediatek,mt7622-pcie"
+	"mediatek,mt7623-pcie"
+- device_type: Must be "pci"
+- reg: Base addresses and lengths of the PCIe subsys and root ports.
+- reg-names: Names of the above areas to use during resource lookup.
+- #address-cells: Address representation for root ports (must be 3)
+- #size-cells: Size representation for root ports (must be 2)
+- clocks: Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names:
+  Mandatory entries:
+   - sys_ckN :transaction layer and data link layer clock
+  Required entries for MT2701/MT7623:
+   - free_ck :for reference clock of PCIe subsys
+  Required entries for MT2712/MT7622:
+   - ahb_ckN :AHB slave interface operating clock for CSR access and RC
+	      initiated MMIO access
+  Required entries for MT7622:
+   - axi_ckN :application layer MMIO channel operating clock
+   - aux_ckN :pe2_mac_bridge and pe2_mac_core operating clock when
+	      pcie_mac_ck/pcie_pipe_ck is turned off
+   - obff_ckN :OBFF functional block operating clock
+   - pipe_ckN :LTSSM and PHY/MAC layer operating clock
+  where N starting from 0 to one less than the number of root ports.
+- phys: List of PHY specifiers (used by generic PHY framework).
+- phy-names : Must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the
+  number of PHYs as specified in *phys* property.
+- power-domains: A phandle and power domain specifier pair to the power domain
+  which is responsible for collapsing and restoring power to the peripheral.
+- bus-range: Range of bus numbers associated with this controller.
+- ranges: Ranges for the PCI memory and I/O regions.
+
+Required properties for MT7623/MT2701:
+- #interrupt-cells: Size representation for interrupts (must be 1)
+- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
+  Please refer to the standard PCI bus binding document for a more detailed
+  explanation.
+- resets: Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names: Must be "pcie-rst0", "pcie-rst1", "pcie-rstN".. based on the
+  number of root ports.
+
+Required properties for MT2712/MT7622:
+-interrupts: A list of interrupt outputs of the controller, must have one
+	     entry for each PCIe port
+
+In addition, the device tree node must have sub-nodes describing each
+PCIe port interface, having the following mandatory properties:
+
+Required properties:
+- device_type: Must be "pci"
+- reg: Only the first four bytes are used to refer to the correct bus number
+  and device number.
+- #address-cells: Must be 3
+- #size-cells: Must be 2
+- #interrupt-cells: Must be 1
+- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
+  Please refer to the standard PCI bus binding document for a more detailed
+  explanation.
+- ranges: Sub-ranges distributed from the PCIe controller node. An empty
+  property is sufficient.
+- num-lanes: Number of lanes to use for this port.
+
+Examples for MT7623:
+
+	hifsys: syscon@1a000000 {
+		compatible = "mediatek,mt7623-hifsys",
+			     "mediatek,mt2701-hifsys",
+			     "syscon";
+		reg = <0 0x1a000000 0 0x1000>;
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+	};
+
+	pcie: pcie-controller@1a140000 {
+		compatible = "mediatek,mt7623-pcie";
+		device_type = "pci";
+		reg = <0 0x1a140000 0 0x1000>, /* PCIe shared registers */
+		      <0 0x1a142000 0 0x1000>, /* Port0 registers */
+		      <0 0x1a143000 0 0x1000>, /* Port1 registers */
+		      <0 0x1a144000 0 0x1000>; /* Port2 registers */
+		reg-names = "subsys", "port0", "port1", "port2";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0xf800 0 0 0>;
+		interrupt-map = <0x0000 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>,
+				<0x0800 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>,
+				<0x1000 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>;
+		clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
+			 <&hifsys CLK_HIFSYS_PCIE0>,
+			 <&hifsys CLK_HIFSYS_PCIE1>,
+			 <&hifsys CLK_HIFSYS_PCIE2>;
+		clock-names = "free_ck", "sys_ck0", "sys_ck1", "sys_ck2";
+		resets = <&hifsys MT2701_HIFSYS_PCIE0_RST>,
+			 <&hifsys MT2701_HIFSYS_PCIE1_RST>,
+			 <&hifsys MT2701_HIFSYS_PCIE2_RST>;
+		reset-names = "pcie-rst0", "pcie-rst1", "pcie-rst2";
+		phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>,
+		       <&pcie2_phy PHY_TYPE_PCIE>;
+		phy-names = "pcie-phy0", "pcie-phy1", "pcie-phy2";
+		power-domains = <&scpsys MT2701_POWER_DOMAIN_HIF>;
+		bus-range = <0x00 0xff>;
+		ranges = <0x81000000 0 0x1a160000 0 0x1a160000 0 0x00010000	/* I/O space */
+			  0x83000000 0 0x60000000 0 0x60000000 0 0x10000000>;	/* memory space */
+
+		pcie@0,0 {
+			device_type = "pci";
+			reg = <0x0000 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 0>;
+			interrupt-map = <0 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>;
+			ranges;
+			num-lanes = <1>;
+		};
+
+		pcie@1,0 {
+			device_type = "pci";
+			reg = <0x0800 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 0>;
+			interrupt-map = <0 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>;
+			ranges;
+			num-lanes = <1>;
+		};
+
+		pcie@2,0 {
+			device_type = "pci";
+			reg = <0x1000 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 0>;
+			interrupt-map = <0 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>;
+			ranges;
+			num-lanes = <1>;
+		};
+	};
+
+Examples for MT2712:
+	pcie: pcie@11700000 {
+		compatible = "mediatek,mt2712-pcie";
+		device_type = "pci";
+		reg = <0 0x11700000 0 0x1000>,
+		      <0 0x112ff000 0 0x1000>;
+		reg-names = "port0", "port1";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&topckgen CLK_TOP_PE2_MAC_P0_SEL>,
+			 <&topckgen CLK_TOP_PE2_MAC_P1_SEL>,
+			 <&pericfg CLK_PERI_PCIE0>,
+			 <&pericfg CLK_PERI_PCIE1>;
+		clock-names = "sys_ck0", "sys_ck1", "ahb_ck0", "ahb_ck1";
+		phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>;
+		phy-names = "pcie-phy0", "pcie-phy1";
+		bus-range = <0x00 0xff>;
+		ranges = <0x82000000 0 0x20000000  0x0 0x20000000  0 0x10000000>;
+
+		pcie0: pcie@0,0 {
+			device_type = "pci";
+			reg = <0x0000 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			ranges;
+			num-lanes = <1>;
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0 0 0 1 &pcie_intc0 0>,
+					<0 0 0 2 &pcie_intc0 1>,
+					<0 0 0 3 &pcie_intc0 2>,
+					<0 0 0 4 &pcie_intc0 3>;
+			pcie_intc0: interrupt-controller {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+			};
+		};
+
+		pcie1: pcie@1,0 {
+			device_type = "pci";
+			reg = <0x0800 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			ranges;
+			num-lanes = <1>;
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0 0 0 1 &pcie_intc1 0>,
+					<0 0 0 2 &pcie_intc1 1>,
+					<0 0 0 3 &pcie_intc1 2>,
+					<0 0 0 4 &pcie_intc1 3>;
+			pcie_intc1: interrupt-controller {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+			};
+		};
+	};
+
+Examples for MT7622:
+	pcie: pcie@1a140000 {
+		compatible = "mediatek,mt7622-pcie";
+		device_type = "pci";
+		reg = <0 0x1a140000 0 0x1000>,
+		      <0 0x1a143000 0 0x1000>,
+		      <0 0x1a145000 0 0x1000>;
+		reg-names = "subsys", "port0", "port1";
+		#address-cells = <3>;
+		#size-cells = <2>;
+		interrupts = <GIC_SPI 228 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 229 IRQ_TYPE_LEVEL_LOW>;
+		clocks = <&pciesys CLK_PCIE_P0_MAC_EN>,
+			 <&pciesys CLK_PCIE_P1_MAC_EN>,
+			 <&pciesys CLK_PCIE_P0_AHB_EN>,
+			 <&pciesys CLK_PCIE_P1_AHB_EN>,
+			 <&pciesys CLK_PCIE_P0_AUX_EN>,
+			 <&pciesys CLK_PCIE_P1_AUX_EN>,
+			 <&pciesys CLK_PCIE_P0_AXI_EN>,
+			 <&pciesys CLK_PCIE_P1_AXI_EN>,
+			 <&pciesys CLK_PCIE_P0_OBFF_EN>,
+			 <&pciesys CLK_PCIE_P1_OBFF_EN>,
+			 <&pciesys CLK_PCIE_P0_PIPE_EN>,
+			 <&pciesys CLK_PCIE_P1_PIPE_EN>;
+		clock-names = "sys_ck0", "sys_ck1", "ahb_ck0", "ahb_ck1",
+			      "aux_ck0", "aux_ck1", "axi_ck0", "axi_ck1",
+			      "obff_ck0", "obff_ck1", "pipe_ck0", "pipe_ck1";
+		phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>;
+		phy-names = "pcie-phy0", "pcie-phy1";
+		power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
+		bus-range = <0x00 0xff>;
+		ranges = <0x82000000 0 0x20000000  0x0 0x20000000  0 0x10000000>;
+
+		pcie0: pcie@0,0 {
+			device_type = "pci";
+			reg = <0x0000 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			ranges;
+			num-lanes = <1>;
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0 0 0 1 &pcie_intc0 0>,
+					<0 0 0 2 &pcie_intc0 1>,
+					<0 0 0 3 &pcie_intc0 2>,
+					<0 0 0 4 &pcie_intc0 3>;
+			pcie_intc0: interrupt-controller {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+			};
+		};
+
+		pcie1: pcie@1,0 {
+			device_type = "pci";
+			reg = <0x0800 0 0 0 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			ranges;
+			num-lanes = <1>;
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0 0 0 1 &pcie_intc1 0>,
+					<0 0 0 2 &pcie_intc1 1>,
+					<0 0 0 3 &pcie_intc1 2>,
+					<0 0 0 4 &pcie_intc1 3>;
+			pcie_intc1: interrupt-controller {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/pci/mvebu-pci.txt b/Documentation/devicetree/bindings/pci/mvebu-pci.txt
index 9c7fce69..127ae1f 100644
--- a/Documentation/devicetree/bindings/pci/mvebu-pci.txt
+++ b/Documentation/devicetree/bindings/pci/mvebu-pci.txt
@@ -77,7 +77,7 @@
 - marvell,pcie-lane: the physical PCIe lane number, for ports having
   multiple lanes. If this property is not found, we assume that the
   value is 0.
-- reset-gpios: optional gpio to PERST#
+- reset-gpios: optional GPIO to PERST#
 - reset-delay-us: delay in us to wait after reset de-assertion, if not
   specified will default to 100ms, as required by the PCIe specification.
 
diff --git a/Documentation/devicetree/bindings/pci/pci-armada8k.txt b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
index 6b115fd..c1e4c3d 100644
--- a/Documentation/devicetree/bindings/pci/pci-armada8k.txt
+++ b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
@@ -1,6 +1,6 @@
 * Marvell Armada 7K/8K PCIe interface
 
-This PCIe host controller is based on the Synopsis Designware PCIe IP
+This PCIe host controller is based on the Synopsys DesignWare PCIe IP
 and thus inherits all the common properties defined in designware-pcie.txt.
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/pci/pci-keystone.txt b/Documentation/devicetree/bindings/pci/pci-keystone.txt
index d08a4d5..7e05487 100644
--- a/Documentation/devicetree/bindings/pci/pci-keystone.txt
+++ b/Documentation/devicetree/bindings/pci/pci-keystone.txt
@@ -1,12 +1,12 @@
 TI Keystone PCIe interface
 
-Keystone PCI host Controller is based on Designware PCI h/w version 3.65.
-It shares common functions with PCIe Designware core driver and inherit
-common properties defined in
+Keystone PCI host Controller is based on the Synopsys DesignWare PCI
+hardware version 3.65.  It shares common functions with the PCIe DesignWare
+core driver and inherits common properties defined in
 Documentation/devicetree/bindings/pci/designware-pci.txt
 
 Please refer to Documentation/devicetree/bindings/pci/designware-pci.txt
-for the details of Designware DT bindings.  Additional properties are
+for the details of DesignWare DT bindings.  Additional properties are
 described here as well as properties that are not applicable.
 
 Required Properties:-
@@ -52,13 +52,12 @@
 	};
 
 Optional properties:-
-	phys: phandle to Generic Keystone SerDes phy for PCI
-	phy-names: name of the Generic Keystine SerDes phy for PCI
+	phys: phandle to generic Keystone SerDes PHY for PCI
+	phy-names: name of the generic Keystone SerDes PHY for PCI
 	  - If boot loader already does PCI link establishment, then phys and
 	    phy-names shouldn't be present.
 	interrupts: platform interrupt for error interrupts.
 
-Designware DT Properties not applicable for Keystone PCI
+DesignWare DT Properties not applicable for Keystone PCI
 
 1. pcie_bus clock-names not used.  Instead, a phandle to phys is used.
-
diff --git a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
index 07a7509..3d03863 100644
--- a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
+++ b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
@@ -6,11 +6,14 @@
 OHCI and EHCI controllers.
 
 Required properties:
-- compatible: "renesas,pci-r8a7790" for the R8A7790 SoC;
+- compatible: "renesas,pci-r8a7743" for the R8A7743 SoC;
+	      "renesas,pci-r8a7745" for the R8A7745 SoC;
+	      "renesas,pci-r8a7790" for the R8A7790 SoC;
 	      "renesas,pci-r8a7791" for the R8A7791 SoC;
 	      "renesas,pci-r8a7793" for the R8A7793 SoC;
 	      "renesas,pci-r8a7794" for the R8A7794 SoC;
-	      "renesas,pci-rcar-gen2" for a generic R-Car Gen2 compatible device
+	      "renesas,pci-rcar-gen2" for a generic R-Car Gen2 or
+				      RZ/G1 compatible device.
 
 
 	      When compatible with the generic version, nodes must list the
diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.txt b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
index 9d418b7..3c9d321 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie.txt
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
@@ -9,6 +9,7 @@
 			- "qcom,pcie-apq8084" for apq8084
 			- "qcom,pcie-msm8996" for msm8996 or apq8096
 			- "qcom,pcie-ipq4019" for ipq4019
+			- "qcom,pcie-ipq8074" for ipq8074
 
 - reg:
 	Usage: required
@@ -20,7 +21,7 @@
 	Value type: <stringlist>
 	Definition: Must include the following entries
 			- "parf"   Qualcomm specific registers
-			- "dbi"	   Designware PCIe registers
+			- "dbi"	   DesignWare PCIe registers
 			- "elbi"   External local bus interface registers
 			- "config" PCIe configuration space
 
@@ -105,6 +106,16 @@
 			- "bus_master"	Master AXI clock
 			- "bus_slave"	Slave AXI clock
 
+- clock-names:
+	Usage: required for ipq8074
+	Value type: <stringlist>
+	Definition: Should contain the following entries
+			- "iface"	PCIe to SysNOC BIU clock
+			- "axi_m"	AXI Master clock
+			- "axi_s"	AXI Slave clock
+			- "ahb"		AHB clock
+			- "aux"		Auxiliary clock
+
 - resets:
 	Usage: required
 	Value type: <prop-encoded-array>
@@ -144,6 +155,18 @@
 			- "ahb"			AHB reset
 			- "phy_ahb"		PHY AHB reset
 
+- reset-names:
+	Usage: required for ipq8074
+	Value type: <stringlist>
+	Definition: Should contain the following entries
+			- "pipe"		PIPE reset
+			- "sleep"		Sleep reset
+			- "sticky"		Core Sticky reset
+			- "axi_m"		AXI Master reset
+			- "axi_s"		AXI Slave reset
+			- "ahb"			AHB Reset
+			- "axi_m_sticky"	AXI Master Sticky reset
+
 - power-domains:
 	Usage: required for apq8084 and msm8996/apq8096
 	Value type: <prop-encoded-array>
@@ -180,7 +203,7 @@
 - <name>-gpios:
 	Usage: optional
 	Value type: <prop-encoded-array>
-	Definition: List of phandle and gpio specifier pairs. Should contain
+	Definition: List of phandle and GPIO specifier pairs. Should contain
 			- "perst-gpios"	PCIe endpoint reset signal line
 			- "wake-gpios"	PCIe endpoint wake signal line
 
diff --git a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt
index 8e0a1eb..a04ab1b 100644
--- a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt
+++ b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt
@@ -71,7 +71,7 @@
    - interrupt-map: standard PCI properties to define the mapping of the
      PCI interface to interrupt numbers.
 
-   The PCI host bridge node migh have additional sub-nodes representing
+   The PCI host bridge node might have additional sub-nodes representing
    the onboard PCI devices/PCI slots. Each such sub-node must have the
    following mandatory properties:
 
diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt
index a7e3dd4..76ba3a6 100644
--- a/Documentation/devicetree/bindings/pci/rcar-pci.txt
+++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt
@@ -14,7 +14,7 @@
 	    SoC-specific version corresponding to the platform first
 	    followed by the generic version.
 
-- reg: base address and length of the pcie controller registers.
+- reg: base address and length of the PCIe controller registers.
 - #address-cells: set to <3>
 - #size-cells: set to <2>
 - bus-range: PCI bus numbers covered
@@ -25,15 +25,14 @@
 	source for hardware related interrupts (e.g. link speed change).
 - #interrupt-cells: set to <1>
 - interrupt-map-mask and interrupt-map: standard PCI properties
-	to define the mapping of the PCIe interface to interrupt
-	numbers.
+	to define the mapping of the PCIe interface to interrupt numbers.
 - clocks: from common clock binding: clock specifiers for the PCIe controller
 	and PCIe bus clocks.
 - clock-names: from common clock binding: should be "pcie" and "pcie_bus".
 
 Example:
 
-SoC specific DT Entry:
+SoC-specific DT Entry:
 
 	pcie: pcie@fe000000 {
 		compatible = "renesas,pcie-r8a7791", "renesas,pcie-rcar-gen2";
diff --git a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt
index 1453a73..af34c65 100644
--- a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt
@@ -19,8 +19,6 @@
 	- "pm"
 - msi-map: Maps a Requester ID to an MSI controller and associated
 	msi-specifier data. See ./pci-msi.txt
-- phys: From PHY bindings: Phandle for the Generic PHY for PCIe.
-- phy-names:  MUST be "pcie-phy".
 - interrupts: Three interrupt entries must be specified.
 - interrupt-names: Must include the following names
 	- "sys"
@@ -42,11 +40,24 @@
 	interrupt source. The value must be 1.
 - interrupt-map-mask and interrupt-map: standard PCI properties
 
+Required properties for legacy PHY model (deprecated):
+- phys: From PHY bindings: Phandle for the Generic PHY for PCIe.
+- phy-names:  MUST be "pcie-phy".
+
+Required properties for per-lane PHY model (preferred):
+- phys: Must contain an phandle to a PHY for each entry in phy-names.
+- phy-names: Must include 4 entries for all 4 lanes even if some of
+  them won't be used for your cases. Entries are of the form "pcie-phy-N":
+  where N ranges from 0 to 3.
+  (see example below and you MUST also refer to ../phy/rockchip-pcie-phy.txt
+  for changing the #phy-cells of phy node to support it)
+
 Optional Property:
 - aspm-no-l0s: RC won't support ASPM L0s. This property is needed if
 	using 24MHz OSC for RC's PHY.
-- ep-gpios: contain the entry for pre-reset gpio
+- ep-gpios: contain the entry for pre-reset GPIO
 - num-lanes: number of lanes to use
+- vpcie12v-supply: The phandle to the 12v regulator to use for PCIe.
 - vpcie3v3-supply: The phandle to the 3.3v regulator to use for PCIe.
 - vpcie1v8-supply: The phandle to the 1.8v regulator to use for PCIe.
 - vpcie0v9-supply: The phandle to the 0.9v regulator to use for PCIe.
@@ -95,6 +106,7 @@
 		 <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, <&cru SRST_A_PCIE>;
 	reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
 		      "pm", "pclk", "aclk";
+	/* deprecated legacy PHY model */
 	phys = <&pcie_phy>;
 	phy-names = "pcie-phy";
 	pinctrl-names = "default";
@@ -111,3 +123,13 @@
 		#interrupt-cells = <1>;
 	};
 };
+
+pcie0: pcie@f8000000 {
+	...
+
+	/* preferred per-lane PHY model */
+	phys = <&pcie_phy 0>, <&pcie_phy 1>, <&pcie_phy 2>, <&pcie_phy 3>;
+	phy-names = "pcie-phy-0", "pcie-phy-1", "pcie-phy-2", "pcie-phy-3";
+
+	...
+};
diff --git a/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt
index 7d3b094..34a11bf 100644
--- a/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt
@@ -1,29 +1,29 @@
 * Samsung Exynos 5440 PCIe interface
 
-This PCIe host controller is based on the Synopsis Designware PCIe IP
+This PCIe host controller is based on the Synopsys DesignWare PCIe IP
 and thus inherits all the common properties defined in designware-pcie.txt.
 
 Required properties:
 - compatible: "samsung,exynos5440-pcie"
-- reg: base addresses and lengths of the pcie controller,
-	the phy controller, additional register for the phy controller.
-	(Registers for the phy controller are DEPRECATED.
+- reg: base addresses and lengths of the PCIe controller,
+	the PHY controller, additional register for the PHY controller.
+	(Registers for the PHY controller are DEPRECATED.
 	 Use the PHY framework.)
 - reg-names : First name should be set to "elbi".
-	And use the "config" instead of getting the confgiruation address space
+	And use the "config" instead of getting the configuration address space
 	from "ranges".
-	NOTE: When use the "config" property, reg-names must be set.
+	NOTE: When using the "config" property, reg-names must be set.
 - interrupts: A list of interrupt outputs for level interrupt,
 	pulse interrupt, special interrupt.
-- phys: From PHY binding. Phandle for the Generic PHY.
+- phys: From PHY binding. Phandle for the generic PHY.
 	Refer to Documentation/devicetree/bindings/phy/samsung-phy.txt
 
-Other common properties refer to
-	Documentation/devicetree/binding/pci/designware-pcie.txt
+For other common properties, refer to
+	Documentation/devicetree/bindings/pci/designware-pcie.txt
 
 Example:
 
-SoC specific DT Entry:
+SoC-specific DT Entry:
 
 	pcie@290000 {
 		compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
@@ -83,7 +83,7 @@
 		...
 	};
 
-Board specific DT Entry:
+Board-specific DT Entry:
 
 	pcie@290000 {
 		reset-gpio = <&pin_ctrl 5 0>;
diff --git a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt
index 49ea76d..d5a14f5 100644
--- a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt
@@ -1,12 +1,12 @@
 SPEAr13XX PCIe DT detail:
 ================================
 
-SPEAr13XX uses synopsis designware PCIe controller and ST MiPHY as phy
+SPEAr13XX uses the Synopsys DesignWare PCIe controller and ST MiPHY as PHY
 controller.
 
 Required properties:
-- compatible : should be "st,spear1340-pcie", "snps,dw-pcie".
-- phys		    : phandle to phy node associated with pcie controller
+- compatible	    : should be "st,spear1340-pcie", "snps,dw-pcie".
+- phys		    : phandle to PHY node associated with PCIe controller
 - phy-names	    : must be "pcie-phy"
 - All other definitions as per generic PCI bindings
 
diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt b/Documentation/devicetree/bindings/pci/ti-pci.txt
index 6a07c96..7f7af30 100644
--- a/Documentation/devicetree/bindings/pci/ti-pci.txt
+++ b/Documentation/devicetree/bindings/pci/ti-pci.txt
@@ -1,6 +1,6 @@
 TI PCI Controllers
 
-PCIe Designware Controller
+PCIe DesignWare Controller
  - compatible: Should be "ti,dra7-pcie" for RC
 	       Should be "ti,dra7-pcie-ep" for EP
  - phys : list of PHY specifiers (used by generic PHY framework)
@@ -13,7 +13,7 @@
 HOST MODE
 =========
  - reg : Two register ranges as listed in the reg-names property
- - reg-names : The first entry must be "ti-conf" for the TI specific registers
+ - reg-names : The first entry must be "ti-conf" for the TI-specific registers
 	       The second entry must be "rc-dbics" for the DesignWare PCIe
 	       registers
 	       The third entry must be "config" for the PCIe configuration space
@@ -30,7 +30,7 @@
 DEVICE MODE
 ===========
  - reg : Four register ranges as listed in the reg-names property
- - reg-names : "ti-conf" for the TI specific registers
+ - reg-names : "ti-conf" for the TI-specific registers
 	       "ep_dbics" for the standard configuration registers as
 		they are locally accessed within the DIF CS space
 	       "ep_dbics2" for the standard configuration registers as
@@ -46,7 +46,7 @@
 			       access.
 
 Optional Property:
- - gpios : Should be added if a gpio line is required to drive PERST# line
+ - gpios : Should be added if a GPIO line is required to drive PERST# line
 
 NOTE: Two DT nodes may be added for each PCI controller; one for host
 mode and another for device mode. So in order for PCI to
diff --git a/Documentation/devicetree/bindings/pci/versatile.txt b/Documentation/devicetree/bindings/pci/versatile.txt
index ebd1e7d..0a702b1 100644
--- a/Documentation/devicetree/bindings/pci/versatile.txt
+++ b/Documentation/devicetree/bindings/pci/versatile.txt
@@ -5,7 +5,7 @@
 Required properties:
 - compatible: should contain "arm,versatile-pci" to identify the Versatile PCI
   controller.
-- reg: base addresses and lengths of the pci controller. There must be 3
+- reg: base addresses and lengths of the PCI controller. There must be 3
   entries:
 	- Versatile-specific registers
 	- Self Config space
diff --git a/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt b/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
index 54bac7f..85d9b95 100644
--- a/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
+++ b/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
@@ -4,7 +4,7 @@
 
 - compatible: should be "apm,xgene1-msi" to identify
 	      X-Gene v1 PCIe MSI controller block.
-- msi-controller: indicates that this is X-Gene v1 PCIe MSI controller node
+- msi-controller: indicates that this is an X-Gene v1 PCIe MSI controller node
 - reg: physical base address (0x79000000) and length (0x900000) for controller
        registers. These registers include the MSI termination address and data
        registers as well as the MSI interrupt status registers.
@@ -13,7 +13,8 @@
 	      interrupt number 0x10 to 0x1f.
 - interrupt-names: not required
 
-Each PCIe node needs to have property msi-parent that points to msi controller node
+Each PCIe node needs to have property msi-parent that points to an MSI
+controller node
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/pci/xgene-pci.txt b/Documentation/devicetree/bindings/pci/xgene-pci.txt
index 1070b06..6fd2dec 100644
--- a/Documentation/devicetree/bindings/pci/xgene-pci.txt
+++ b/Documentation/devicetree/bindings/pci/xgene-pci.txt
@@ -8,7 +8,7 @@
        property.
 - reg-names: Must include the following entries:
   "csr": controller configuration registers.
-  "cfg": pcie configuration space registers.
+  "cfg": PCIe configuration space registers.
 - #address-cells: set to <3>
 - #size-cells: set to <2>
 - ranges: ranges for the outbound memory, I/O regions.
@@ -21,11 +21,11 @@
 
 Optional properties:
 - status: Either "ok" or "disabled".
-- dma-coherent: Present if dma operations are coherent
+- dma-coherent: Present if DMA operations are coherent
 
 Example:
 
-SoC specific DT Entry:
+SoC-specific DT Entry:
 
 	pcie0: pcie@1f2b0000 {
 		status = "disabled";
@@ -51,7 +51,7 @@
 	};
 
 
-Board specific DT Entry:
+Board-specific DT Entry:
 	&pcie0 {
 		status = "ok";
 	};
diff --git a/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt b/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt
index 3259798..01bf7fd 100644
--- a/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt
@@ -15,9 +15,9 @@
 - device_type: must be "pci"
 - interrupts: Should contain NWL PCIe interrupt
 - interrupt-names: Must include the following entries:
-	"msi1, msi0": interrupt asserted when MSI is received
+	"msi1, msi0": interrupt asserted when an MSI is received
 	"intx": interrupt asserted when a legacy interrupt is received
-	"misc": interrupt asserted when miscellaneous is received
+	"misc": interrupt asserted when miscellaneous interrupt is received
 - interrupt-map-mask and interrupt-map: standard PCI properties to define the
 	mapping of the PCI interface to interrupt numbers.
 - ranges: ranges for the PCI memory regions (I/O space region is not
@@ -26,7 +26,8 @@
 	detailed explanation
 - msi-controller: indicates that this is MSI controller node
 - msi-parent:  MSI parent of the root complex itself
-- legacy-interrupt-controller: Interrupt controller device node for Legacy interrupts
+- legacy-interrupt-controller: Interrupt controller device node for Legacy
+	interrupts
 	- interrupt-controller: identifies the node as an interrupt controller
 	- #interrupt-cells: should be set to 1
 	- #address-cells: specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
index 0f6222a..b496042 100644
--- a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
@@ -3,7 +3,6 @@
 
 Required properties:
  - compatible: rockchip,rk3399-pcie-phy
- - #phy-cells: must be 0
  - clocks: Must contain an entry in clock-names.
 	See ../clocks/clock-bindings.txt for details.
  - clock-names: Must be "refclk"
@@ -11,6 +10,12 @@
 	See ../reset/reset.txt for details.
  - reset-names: Must be "phy"
 
+Required properties for legacy PHY mode (deprecated):
+ - #phy-cells: must be 0
+
+Required properties for per-lane PHY mode (preferred):
+ - #phy-cells: must be 1
+
 Example:
 
 grf: syscon@ff770000 {
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index ba2e7d2..00b6dfe 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -121,8 +121,9 @@
 Sysfs device attributes for the sensors and fan are on the
 thinkpad_hwmon device's sysfs attribute space, but you should locate it
 looking for a hwmon device with the name attribute of "thinkpad", or
-better yet, through libsensors.
-
+better yet, through libsensors. For 4.14+ sysfs attributes were moved to the
+hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or
+/sys/class/hwmon/hwmon?).
 
 Driver version
 --------------
@@ -1478,3 +1479,7 @@
 0x020700:	Support for mute-only mixers.
 		Volume control in read-only mode by default.
 		Marker for ALSA mixer support.
+
+0x030000:	Thermal and fan sysfs attributes were moved to the hwmon
+		device instead of being attached to the backing platform
+		device.
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index b2f60ca..b3ce126 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -83,6 +83,11 @@
 
     Bits for undefined preemption levels are RAZ/WI.
 
+    Note that this differs from a CPU's view of the APRs on hardware in which
+    a GIC without the security extensions expose group 0 and group 1 active
+    priorities in separate register groups, whereas we show a combined view
+    similar to GICv2's GICH_APR.
+
     For historical reasons and to provide ABI compatibility with userspace we
     export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
     field in the lower 5 bits of a word, meaning that userspace must always
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 903fc92..95ca68d 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -176,7 +176,8 @@
 
 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
 
-Allows user space to set/get the TOD clock extension (u8).
+Allows user space to set/get the TOD clock extension (u8) (superseded by
+KVM_S390_VM_TOD_EXT).
 
 Parameters: address of a buffer in user space to store the data (u8) to
 Returns:    -EFAULT if the given address is not accessible from kernel space
@@ -190,6 +191,17 @@
 Parameters: address of a buffer in user space to store the data (u64) to
 Returns:    -EFAULT if the given address is not accessible from kernel space
 
+3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
+Allows user space to set/get bits 0-63 of the TOD clock register as defined in
+the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
+also allows user space to get/set it. If the guest CPU model does not support
+it, it is stored as 0 and not allowed to be set to a value != 0.
+
+Parameters: address of a buffer in user space to store the data
+            (kvm_s390_vm_tod_clock) to
+Returns:    -EFAULT if the given address is not accessible from kernel space
+	    -EINVAL if setting the TOD clock extension to != 0 is not supported
+
 4. GROUP: KVM_S390_VM_CRYPTO
 Architectures: s390
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 109c5d9..7f32b51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7471,18 +7471,30 @@
 W:	http://www.linux-kvm.org
 T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
 S:	Supported
-F:	Documentation/*/kvm*.txt
 F:	Documentation/virtual/kvm/
-F:	arch/*/kvm/
-F:	arch/x86/kernel/kvm.c
-F:	arch/x86/kernel/kvmclock.c
-F:	arch/*/include/asm/kvm*
-F:	include/linux/kvm*
+F:	include/trace/events/kvm.h
+F:	include/uapi/asm-generic/kvm*
 F:	include/uapi/linux/kvm*
-F:	virt/kvm/
+F:	include/asm-generic/kvm*
+F:	include/linux/kvm*
+F:	include/kvm/iodev.h
+F:	virt/kvm/*
 F:	tools/kvm/
 
-KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
+KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
+M:	Paolo Bonzini <pbonzini@redhat.com>
+M:	Radim KrÄmář <rkrcmar@redhat.com>
+L:	kvm@vger.kernel.org
+W:	http://www.linux-kvm.org
+T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
+S:	Supported
+F:	arch/x86/kvm/
+F:	arch/x86/include/uapi/asm/kvm*
+F:	arch/x86/include/asm/kvm*
+F:	arch/x86/kernel/kvm.c
+F:	arch/x86/kernel/kvmclock.c
+
+KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd)
 M:	Joerg Roedel <joro@8bytes.org>
 L:	kvm@vger.kernel.org
 W:	http://www.linux-kvm.org/
@@ -7490,7 +7502,7 @@
 F:	arch/x86/include/asm/svm.h
 F:	arch/x86/kvm/svm.c
 
-KERNEL VIRTUAL MACHINE (KVM) FOR ARM
+KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm)
 M:	Christoffer Dall <christoffer.dall@linaro.org>
 M:	Marc Zyngier <marc.zyngier@arm.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -7504,14 +7516,16 @@
 F:	virt/kvm/arm/
 F:	include/kvm/arm_*
 
-KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
+KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc)
 M:	Alexander Graf <agraf@suse.com>
 L:	kvm-ppc@vger.kernel.org
 W:	http://www.linux-kvm.org/
 T:	git git://github.com/agraf/linux-2.6.git
 S:	Supported
+F:	arch/powerpc/include/uapi/asm/kvm*
 F:	arch/powerpc/include/asm/kvm*
 F:	arch/powerpc/kvm/
+F:	arch/powerpc/kernel/kvm*
 
 KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
 M:	Christoffer Dall <christoffer.dall@linaro.org>
@@ -7538,7 +7552,8 @@
 W:	http://www.ibm.com/developerworks/linux/linux390/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
 S:	Supported
-F:	Documentation/s390/kvm.txt
+F:	arch/s390/include/uapi/asm/kvm*
+F:	arch/s390/include/asm/gmap.h
 F:	arch/s390/include/asm/kvm*
 F:	arch/s390/kvm/
 F:	arch/s390/mm/gmap.c
@@ -10243,6 +10258,7 @@
 
 PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
 M:	Keith Busch <keith.busch@intel.com>
+M:	Jonathan Derrick <jonathan.derrick@intel.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
 F:	drivers/pci/host/vmd.c
@@ -10289,7 +10305,7 @@
 S:	Maintained
 F:	drivers/pci/dwc/pci-exynos.c
 
-PCI DRIVER FOR SYNOPSIS DESIGNWARE
+PCI DRIVER FOR SYNOPSYS DESIGNWARE
 M:	Jingoo Han <jingoohan1@gmail.com>
 M:	Joao Pinto <Joao.Pinto@synopsys.com>
 L:	linux-pci@vger.kernel.org
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 8322df1..564114e 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -312,8 +312,9 @@ common_init_pci(void)
 {
 	struct pci_controller *hose;
 	struct list_head resources;
+	struct pci_host_bridge *bridge;
 	struct pci_bus *bus;
-	int next_busno;
+	int ret, next_busno;
 	int need_domain_info = 0;
 	u32 pci_mem_end;
 	u32 sg_base;
@@ -336,11 +337,25 @@ common_init_pci(void)
 		pci_add_resource_offset(&resources, hose->mem_space,
 					hose->mem_space->start);
 
-		bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops,
-					hose, &resources);
-		if (!bus)
+		bridge = pci_alloc_host_bridge(0);
+		if (!bridge)
 			continue;
-		hose->bus = bus;
+
+		list_splice_init(&resources, &bridge->windows);
+		bridge->dev.parent = NULL;
+		bridge->sysdata = hose;
+		bridge->busnr = next_busno;
+		bridge->ops = alpha_mv.pci_ops;
+		bridge->swizzle_irq = alpha_mv.pci_swizzle;
+		bridge->map_irq = alpha_mv.pci_map_irq;
+
+		ret = pci_scan_root_bus_bridge(bridge);
+		if (ret) {
+			pci_free_host_bridge(bridge);
+			continue;
+		}
+
+		bus = hose->bus = bridge->bus;
 		hose->need_domain_info = need_domain_info;
 		next_busno = bus->busn_res.end + 1;
 		/* Don't allow 8-bit bus number overflow inside the hose -
@@ -354,7 +369,6 @@ common_init_pci(void)
 	pcibios_claim_console_setup();
 
 	pci_assign_unassigned_resources();
-	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
 	for (hose = hose_head; hose; hose = hose->next) {
 		bus = hose->bus;
 		if (bus)
@@ -362,7 +376,6 @@ common_init_pci(void)
 	}
 }
 
-
 struct pci_controller * __init
 alloc_pci_controller(void)
 {
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 2cfaa0e..8ae04a1 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -194,22 +194,46 @@ static struct resource irongate_mem = {
 	.name	= "Irongate PCI MEM",
 	.flags	= IORESOURCE_MEM,
 };
+static struct resource busn_resource = {
+	.name	= "PCI busn",
+	.start	= 0,
+	.end	= 255,
+	.flags	= IORESOURCE_BUS,
+};
 
 void __init
 nautilus_init_pci(void)
 {
 	struct pci_controller *hose = hose_head;
+	struct pci_host_bridge *bridge;
 	struct pci_bus *bus;
 	struct pci_dev *irongate;
 	unsigned long bus_align, bus_size, pci_mem;
 	unsigned long memtop = max_low_pfn << PAGE_SHIFT;
+	int ret;
 
-	/* Scan our single hose.  */
-	bus = pci_scan_bus(0, alpha_mv.pci_ops, hose);
-	if (!bus)
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
 		return;
 
-	hose->bus = bus;
+	pci_add_resource(&bridge->windows, &ioport_resource);
+	pci_add_resource(&bridge->windows, &iomem_resource);
+	pci_add_resource(&bridge->windows, &busn_resource);
+	bridge->dev.parent = NULL;
+	bridge->sysdata = hose;
+	bridge->busnr = 0;
+	bridge->ops = alpha_mv.pci_ops;
+	bridge->swizzle_irq = alpha_mv.pci_swizzle;
+	bridge->map_irq = alpha_mv.pci_map_irq;
+
+	/* Scan our single hose.  */
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
+		return;
+	}
+
+	bus = hose->bus = bridge->bus;
 	pcibios_claim_one_bus(bus);
 
 	irongate = pci_get_bus_and_slot(0, 0);
@@ -254,7 +278,6 @@ nautilus_init_pci(void)
 	/* pci_common_swizzle() relies on bus->self being NULL
 	   for the root bus, so just clear it. */
 	bus->self = NULL;
-	pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
 	pci_bus_add_devices(bus);
 }
 
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 7db85ab..a598641 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -100,6 +100,7 @@
 source "arch/arc/plat-axs10x/Kconfig"
 #New platform adds here
 source "arch/arc/plat-eznps/Kconfig"
+source "arch/arc/plat-hsdk/Kconfig"
 
 endmenu
 
@@ -418,7 +419,7 @@
 endmenu   # "ARC CPU Configuration"
 
 config LINUX_LINK_BASE
-	hex "Linux Link Address"
+	hex "Kernel link address"
 	default "0x80000000"
 	help
 	  ARC700 divides the 32 bit phy address space into two equal halves
@@ -431,6 +432,14 @@
 	  If you don't know what the above means, leave this setting alone.
 	  This needs to match memory start address specified in Device Tree
 
+config LINUX_RAM_BASE
+	hex "RAM base address"
+	default LINUX_LINK_BASE
+	help
+	  By default Linux is linked at base of RAM. However in some special
+	  cases (such as HSDK), Linux can't be linked at start of DDR, hence
+	  this option.
+
 config HIGHMEM
 	bool "High Memory Support"
 	select ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 3a61cfc..3a4b52b 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -111,6 +111,7 @@
 core-$(CONFIG_ARC_PLAT_TB10X)	+= arch/arc/plat-tb10x/
 core-$(CONFIG_ARC_PLAT_AXS10X)	+= arch/arc/plat-axs10x/
 core-$(CONFIG_ARC_PLAT_EZNPS)	+= arch/arc/plat-eznps/
+core-$(CONFIG_ARC_SOC_HSDK)	+= arch/arc/plat-hsdk/
 
 ifdef CONFIG_ARC_PLAT_EZNPS
 KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index a380ffa..fdc2665 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -99,7 +99,7 @@
 
 	memory {
 		device_type = "memory";
-		/* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
 		reg = <0x0 0x80000000 0x0 0x1b000000>;	/* (512 - 32) MiB */
 	};
 
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index cc9239e..4e6e9f5 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -24,10 +24,17 @@
 
 		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
 
-		core_clk: core_clk {
+		input_clk: input-clk {
 			#clock-cells = <0>;
 			compatible = "fixed-clock";
-			clock-frequency = <90000000>;
+			clock-frequency = <33333333>;
+		};
+
+		core_clk: core-clk@80 {
+			compatible = "snps,axs10x-arc-pll-clock";
+			reg = <0x80 0x10>, <0x100 0x10>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
 		};
 
 		core_intc: archs-intc@cpu {
@@ -102,7 +109,7 @@
 
 	memory {
 		device_type = "memory";
-		/* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
 		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MiB low mem */
 		       0x1 0xc0000000 0x0 0x40000000>;	/* 1 GiB highmem */
 	};
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 4ebb2170..63954a8 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -24,10 +24,17 @@
 
 		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
 
-		core_clk: core_clk {
+		input_clk: input-clk {
 			#clock-cells = <0>;
 			compatible = "fixed-clock";
-			clock-frequency = <100000000>;
+			clock-frequency = <33333333>;
+		};
+
+		core_clk: core-clk@80 {
+			compatible = "snps,axs10x-arc-pll-clock";
+			reg = <0x80 0x10>, <0x100 0x10>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
 		};
 
 		core_intc: archs-intc@cpu {
@@ -108,7 +115,7 @@
 
 	memory {
 		device_type = "memory";
-		/* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
 		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MiB low mem */
 		       0x1 0xc0000000 0x0 0x40000000>;	/* 1 GiB highmem */
 	};
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
new file mode 100644
index 0000000..229d13a
--- /dev/null
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for ARC HS Development Kit
+ */
+/dts-v1/;
+
+#include <dt-bindings/net/ti-dp83867.h>
+
+/ {
+	model = "snps,hsdk";
+	compatible = "snps,hsdk";
+
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <0>;
+			clocks = <&core_clk>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <1>;
+			clocks = <&core_clk>;
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <2>;
+			clocks = <&core_clk>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <3>;
+			clocks = <&core_clk>;
+		};
+	};
+
+	core_clk: core-clk {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <500000000>;
+	};
+
+	cpu_intc: cpu-interrupt-controller {
+		compatible = "snps,archs-intc";
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	idu_intc: idu-interrupt-controller {
+		compatible = "snps,archs-idu-intc";
+		interrupt-controller;
+		#interrupt-cells = <1>;
+		interrupt-parent = <&cpu_intc>;
+	};
+
+	arcpct: pct {
+		compatible = "snps,archs-pct";
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer {
+		compatible = "snps,arc-timer";
+		interrupts = <16>;
+		interrupt-parent = <&cpu_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* 64-bit Global Free Running Counter */
+	gfrc {
+		compatible = "snps,archs-timer-gfrc";
+		clocks = <&core_clk>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupt-parent = <&idu_intc>;
+
+		ranges = <0x00000000 0xf0000000 0x10000000>;
+
+		serial: serial@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <33330000>;
+			interrupts = <6>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		gmacclk: gmacclk {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		mmcclk_ciu: mmcclk-ciu {
+			compatible = "fixed-clock";
+			clock-frequency = <100000000>;
+			#clock-cells = <0>;
+		};
+
+		mmcclk_biu: mmcclk-biu {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		ethernet@8000 {
+			#interrupt-cells = <1>;
+			compatible = "snps,dwmac";
+			reg = <0x8000 0x2000>;
+			interrupts = <10>;
+			interrupt-names = "macirq";
+			phy-mode = "rgmii";
+			snps,pbl = <32>;
+			clocks = <&gmacclk>;
+			clock-names = "stmmaceth";
+			phy-handle = <&phy0>;
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "snps,dwmac-mdio";
+				phy0: ethernet-phy@0 {
+					reg = <0>;
+					ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
+					ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
+					ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+				};
+			};
+		};
+
+		ohci@60000 {
+			compatible = "snps,hsdk-v1.0-ohci", "generic-ohci";
+			reg = <0x60000 0x100>;
+			interrupts = <15>;
+		};
+
+		ehci@40000 {
+			compatible = "snps,hsdk-v1.0-ehci", "generic-ehci";
+			reg = <0x40000 0x100>;
+			interrupts = <15>;
+		};
+
+		mmc@a000 {
+			compatible = "altr,socfpga-dw-mshc";
+			reg = <0xa000 0x400>;
+			num-slots = <1>;
+			fifo-depth = <16>;
+			card-detect-delay = <200>;
+			clocks = <&mmcclk_biu>, <&mmcclk_ciu>;
+			clock-names = "biu", "ciu";
+			interrupts = <12>;
+			bus-width = <4>;
+		};
+	};
+
+	memory@80000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "memory";
+		reg = <0x80000000 0x40000000>;  /* 1 GiB */
+	};
+};
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index 3772c40..8d787b2 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -18,7 +18,7 @@
 
 	memory {
 		device_type = "memory";
-		/* CONFIG_LINUX_LINK_BASE needs to match low mem start */
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
 		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MB low mem */
 		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
 	};
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
new file mode 100644
index 0000000..9a3fcf4
--- /dev/null
+++ b/arch/arc/configs/hsdk_defconfig
@@ -0,0 +1,80 @@
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_ARC_SOC_HSDK=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_LINUX_LINK_BASE=0x90000000
+CONFIG_LINUX_RAM_BASE=0x80000000
+CONFIG_ARC_BUILTIN_DTB_NAME="hsdk"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_STMMAC_ETH=y
+CONFIG_MICREL_PHY=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FB_UDL=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
+CONFIG_CRYPTO_ECHAINIV=y
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 02fd1ce..8486f32 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -47,7 +47,8 @@
 	: "r"(data), "r"(ptr));		\
 })
 
-#define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
+/* Largest line length for either L1 or L2 is 128 bytes */
+#define ARCH_DMA_MINALIGN      128
 
 extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
@@ -95,6 +96,8 @@ extern unsigned long perip_base, perip_end;
 #define ARC_REG_SLC_CTRL	0x903
 #define ARC_REG_SLC_FLUSH	0x904
 #define ARC_REG_SLC_INVALIDATE	0x905
+#define ARC_AUX_SLC_IVDL	0x910
+#define ARC_AUX_SLC_FLDL	0x912
 #define ARC_REG_SLC_RGN_START	0x914
 #define ARC_REG_SLC_RGN_START1	0x915
 #define ARC_REG_SLC_RGN_END	0x916
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 14c310f..ec36d5b 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -192,6 +192,12 @@
 	PUSHAX	lp_start
 	PUSHAX	erbta
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+	.word CTOP_INST_SCHD_RW
+	PUSHAX  CTOP_AUX_GPA1
+	PUSHAX  CTOP_AUX_EFLAGS
+#endif
+
 	lr	r9, [ecr]
 	st      r9, [sp, PT_event]    /* EV_Trap expects r9 to have ECR */
 .endm
@@ -208,6 +214,12 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro EXCEPTION_EPILOGUE
+#ifdef CONFIG_ARC_PLAT_EZNPS
+	.word CTOP_INST_SCHD_RW
+	POPAX   CTOP_AUX_EFLAGS
+	POPAX   CTOP_AUX_GPA1
+#endif
+
 	POPAX	erbta
 	POPAX	lp_start
 	POPAX	lp_end
@@ -265,6 +277,12 @@
 	PUSHAX	lp_end
 	PUSHAX	lp_start
 	PUSHAX	bta_l\LVL\()
+
+#ifdef CONFIG_ARC_PLAT_EZNPS
+	.word CTOP_INST_SCHD_RW
+	PUSHAX  CTOP_AUX_GPA1
+	PUSHAX  CTOP_AUX_EFLAGS
+#endif
 .endm
 
 /*--------------------------------------------------------------
@@ -277,6 +295,12 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro INTERRUPT_EPILOGUE  LVL
+#ifdef CONFIG_ARC_PLAT_EZNPS
+	.word CTOP_INST_SCHD_RW
+	POPAX   CTOP_AUX_EFLAGS
+	POPAX   CTOP_AUX_GPA1
+#endif
+
 	POPAX	bta_l\LVL\()
 	POPAX	lp_start
 	POPAX	lp_end
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index a64c447..8a4f77e 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -47,9 +47,6 @@
 #define ISA_INIT_STATUS_BITS	(STATUS_IE_MASK | STATUS_AD_MASK | \
 					(ARCV2_IRQ_DEF_PRIO << 1))
 
-/* SLEEP needs default irq priority (<=) which can interrupt the doze */
-#define ISA_SLEEP_ARG		(0x10 | ARCV2_IRQ_DEF_PRIO)
-
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 4c6eed8..fcb8017 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -43,8 +43,6 @@
 
 #define ISA_INIT_STATUS_BITS	STATUS_IE_MASK
 
-#define ISA_SLEEP_ARG		0x3
-
 #ifndef __ASSEMBLY__
 
 /******************************************************************
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 296c342..109baa0 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -85,7 +85,7 @@ typedef pte_t * pgtable_t;
  */
 #define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
 
-#define ARCH_PFN_OFFSET		virt_to_pfn(CONFIG_LINUX_LINK_BASE)
+#define ARCH_PFN_OFFSET		virt_to_pfn(CONFIG_LINUX_RAM_BASE)
 
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		(((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 4104a08..d400a21 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -27,6 +27,13 @@ struct arc_fpu {
 };
 #endif
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+struct eznps_dp {
+	unsigned int eflags;
+	unsigned int gpa1;
+};
+#endif
+
 /* Arch specific stuff which needs to be saved per task.
  * However these items are not so important so as to earn a place in
  * struct thread_info
@@ -38,6 +45,9 @@ struct thread_struct {
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
+#ifdef CONFIG_ARC_PLAT_EZNPS
+	struct eznps_dp dp;
+#endif
 };
 
 #define INIT_THREAD  {                          \
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 5297faa..5a8cb22 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -19,6 +19,11 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 struct pt_regs {
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+	unsigned long eflags;	/* Extended FLAGS */
+	unsigned long gpa1;	/* General Purpose Aux */
+#endif
+
 	/* Real registers */
 	unsigned long bta;	/* bta_l1, bta_l2, erbta */
 
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index a325e6a..47efc84 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -247,9 +247,15 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 	__asm__ __volatile__(
 	"1:	ex  %0, [%1]		\n"
+#ifdef CONFIG_EZNPS_MTM_EXT
+	"	.word %3		\n"
+#endif
 	"	breq  %0, %2, 1b	\n"
 	: "+&r" (val)
 	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
+#ifdef CONFIG_EZNPS_MTM_EXT
+	, "i"(CTOP_INST_SCHD_RW)
+#endif
 	: "memory");
 
 	/*
@@ -291,6 +297,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	 */
 	smp_mb();
 
+	/*
+	 * EX is not really required here, a simple STore of 0 suffices.
+	 * However this causes tasklist livelocks in SystemC based SMP virtual
+	 * platforms where the systemc core scheduler uses EX as a cue for
+	 * moving to next core. Do a git log of this file for details
+	 */
 	__asm__ __volatile__(
 	"	ex  %0, [%1]		\n"
 	: "+r" (val)
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 1b171ab..f7d07fe 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -26,10 +26,19 @@ extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
 
 #endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
+#define ARC_EZNPS_DP_PREV(p, n)      dp_save_restore(p, n)
+#else
+#define ARC_EZNPS_DP_PREV(p, n)
+
+#endif /* !CONFIG_ARC_PLAT_EZNPS */
+
 struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
 
 #define switch_to(prev, next, last)	\
 do {					\
+	ARC_EZNPS_DP_PREV(prev, next);	\
 	ARC_FPU_PREV(prev, next);	\
 	last = __switch_to(prev, next);\
 	ARC_FPU_NEXT(next);		\
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 8942c5c..2dc5f42 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -12,7 +12,6 @@
 obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
 obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
-obj-$(CONFIG_PCI)  			+= pcibios.o
 
 obj-$(CONFIG_MODULES)			+= arcksyms.o module.o
 obj-$(CONFIG_SMP) 			+= smp.o
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 3b67f53..521ef35 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -29,8 +29,9 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 {
 	if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
 		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
-	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
-		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x) */
+	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
+		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
+		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
 	else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
 		arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
 	else
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 9211707..f285dbb 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -25,12 +25,12 @@
  *
  * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
  *  -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
- *  -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
+ *  -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't
  *   need ptregs anymore
  *
  * Vineetg: Oct 2009
  *  -In a rare scenario, Process gets a Priv-V exception and gets scheduled
- *   out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
+ *   out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains
  *   active (AE bit enabled).  This causes a double fault for a subseq valid
  *   exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
  *   Instr Error could also cause similar scenario, so same there as well.
@@ -59,7 +59,7 @@
  */
 
 #include <linux/errno.h>
-#include <linux/linkage.h>	/* {EXTRY,EXIT} */
+#include <linux/linkage.h>	/* {ENTRY,EXIT} */
 #include <asm/entry.h>
 #include <asm/irqflags.h>
 
@@ -80,8 +80,8 @@
 	.align 4
 
 /* Each entry in the vector table must occupy 2 words. Since it is a jump
- * across sections (.vector to .text) we are gauranteed that 'j somewhere'
- * will use the 'j limm' form of the intrsuction as long as somewhere is in
+ * across sections (.vector to .text) we are guaranteed that 'j somewhere'
+ * will use the 'j limm' form of the instruction as long as somewhere is in
  * a section other than .vector.
  */
 
@@ -105,13 +105,13 @@
 
 ; ******************** Exceptions **********************
 VECTOR   EV_MachineCheck         ; 0x100, Fatal Machine check   (0x20)
-VECTOR   EV_TLBMissI             ; 0x108, Intruction TLB miss   (0x21)
+VECTOR   EV_TLBMissI             ; 0x108, Instruction TLB miss  (0x21)
 VECTOR   EV_TLBMissD             ; 0x110, Data TLB miss         (0x22)
 VECTOR   EV_TLBProtV             ; 0x118, Protection Violation  (0x23)
 				 ;         or Misaligned Access
 VECTOR   EV_PrivilegeV           ; 0x120, Privilege Violation   (0x24)
 VECTOR   EV_Trap                 ; 0x128, Trap exception        (0x25)
-VECTOR   EV_Extension            ; 0x130, Extn Intruction Excp  (0x26)
+VECTOR   EV_Extension            ; 0x130, Extn Instruction Excp (0x26)
 
 .rept   24
 VECTOR   reserved                ; Reserved Exceptions
@@ -199,7 +199,7 @@
 
 ; ---------------------------------------------
 ; User Mode Memory Bus Error Interrupt Handler
-; (Kernel mode memory errors handled via seperate exception vectors)
+; (Kernel mode memory errors handled via separate exception vectors)
 ; ---------------------------------------------
 ENTRY(mem_service)
 
@@ -273,7 +273,7 @@
 	;------ (5) Type of Protection Violation? ----------
 	;
 	; ProtV Hardware Exception is triggered for Access Faults of 2 types
-	;   -Access Violaton	: 00_23_(00|01|02|03)_00
+	;   -Access Violation	: 00_23_(00|01|02|03)_00
 	;			         x  r  w  r+w
 	;   -Unaligned Access	: 00_23_04_00
 	;
@@ -327,7 +327,7 @@
 
 .Lrestore_regs:
 
-	# Interrpts are actually disabled from this point on, but will get
+	# Interrupts are actually disabled from this point on, but will get
 	# reenabled after we return from interrupt/exception.
 	# But irq tracer needs to be told now...
 	TRACE_ASM_IRQ_ENABLE
@@ -335,7 +335,7 @@
 	lr	r10, [status32]
 
 	; Restore REG File. In case multiple Events outstanding,
-	; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+	; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None
 	; Note that we use realtime STATUS32 (not pt_regs->status32) to
 	; decide that.
 
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1eea99b..85d9ea4 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -92,6 +92,12 @@
 	lr  r0, [efa]
 	mov r1, sp
 
+	; hardware auto-disables MMU, re-enable it to allow kernel vaddr
+	; access for say stack unwinding of modules for crash dumps
+	lr	r3, [ARC_REG_PID]
+	or	r3, r3, MMU_ENABLE
+	sr	r3, [ARC_REG_PID]
+
 	lsr  	r3, r2, 8
 	bmsk 	r3, r3, 7
 	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
diff --git a/arch/arc/kernel/pcibios.c b/arch/arc/kernel/pcibios.c
deleted file mode 100644
index 72e1d73..0000000
--- a/arch/arc/kernel/pcibios.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/pci.h>
-
-/*
- * We don't have to worry about legacy ISA devices, so nothing to do here
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 2a018de..5ac3b54 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -79,15 +79,40 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 	return uval;
 }
 
+#ifdef CONFIG_ISA_ARCV2
+
 void arch_cpu_idle(void)
 {
-	/* sleep, but enable all interrupts before committing */
+	/* Re-enable interrupts <= default irq priority before commiting SLEEP */
+	const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
+
 	__asm__ __volatile__(
 		"sleep %0	\n"
 		:
-		:"I"(ISA_SLEEP_ARG)); /* can't be "r" has to be embedded const */
+		:"I"(arg)); /* can't be "r" has to be embedded const */
 }
 
+#elif defined(CONFIG_EZNPS_MTM_EXT)	/* ARC700 variant in NPS */
+
+void arch_cpu_idle(void)
+{
+	/* only the calling HW thread needs to sleep */
+	__asm__ __volatile__(
+		".word %0	\n"
+		:
+		:"i"(CTOP_INST_HWSCHD_WFT_IE12));
+}
+
+#else	/* ARC700 */
+
+void arch_cpu_idle(void)
+{
+	/* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+	__asm__ __volatile__("sleep 0x3	\n");
+}
+
+#endif
+
 asmlinkage void ret_from_fork(void);
 
 /*
@@ -209,6 +234,10 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
 	 */
 	regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
 
+#ifdef CONFIG_EZNPS_MTM_EXT
+	regs->eflags = 0;
+#endif
+
 	/* bogus seed values for debugging */
 	regs->lp_start = 0x10;
 	regs->lp_end = 0x80;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 666613f..c4ffb44 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -385,13 +385,13 @@ void setup_processor(void)
 	read_arc_build_cfg_regs();
 	arc_init_IRQ();
 
-	printk(arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+	pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
 
 	arc_mmu_init();
 	arc_cache_init();
 
-	printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
-	printk(arc_platform_smp_cpuinfo());
+	pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
+	pr_info("%s", arc_platform_smp_cpuinfo());
 
 	arc_chk_core_config();
 }
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index ff83e78..bcd7c9f 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -80,7 +80,7 @@ int name(unsigned long address, struct pt_regs *regs) \
 DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC)
 DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
 DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
-DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
+DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
 
@@ -103,7 +103,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs,
  */
 void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
 {
-	die("Machine Check Exception", regs, address);
+	die("Unhandled Machine Check Exception", regs, address);
 }
 
 
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index f9caf79..7e94476 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -140,7 +140,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
 	} else if (vec == ECR_V_ITLB_MISS) {
 		pr_cont("Insn could not be fetched\n");
 	} else if (vec == ECR_V_MACH_CHK) {
-		pr_cont("%s\n", (cause_code == 0x0) ?
+		pr_cont("Machine Check (%s)\n", (cause_code == 0x0) ?
 					"Double Fault" : "Other Fatal Err");
 
 	} else if (vec == ECR_V_PROTV) {
@@ -233,6 +233,9 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
 {
 	current->thread.fault_address = address;
 
+	/* Show fault description */
+	pr_info("\n%s\n", str);
+
 	/* Caller and Callee regs */
 	show_regs(regs);
 
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 7db283b..eee924d 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -652,7 +652,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
-noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
+noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
 	/*
@@ -715,6 +715,58 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
 #endif
 }
 
+noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+	/*
+	 * SLC is shared between all cores and concurrent aux operations from
+	 * multiple cores need to be serialized using a spinlock
+	 * A concurrent operation can be silently ignored and/or the old/new
+	 * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+	 * below)
+	 */
+	static DEFINE_SPINLOCK(lock);
+
+	const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1);
+	unsigned int ctrl, cmd;
+	unsigned long flags;
+	int num_lines;
+
+	spin_lock_irqsave(&lock, flags);
+
+	ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+	/* Don't rely on default value of IM bit */
+	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
+		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
+	else
+		ctrl |= SLC_CTRL_IM;
+
+	write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+	cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL;
+
+	sz += paddr & ~SLC_LINE_MASK;
+	paddr &= SLC_LINE_MASK;
+
+	num_lines = DIV_ROUND_UP(sz, l2_line_sz);
+
+	while (num_lines-- > 0) {
+		write_aux_reg(cmd, paddr);
+		paddr += l2_line_sz;
+	}
+
+	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+	read_aux_reg(ARC_REG_SLC_CTRL);
+
+	while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+	spin_unlock_irqrestore(&lock, flags);
+#endif
+}
+
+#define slc_op(paddr, sz, op)	slc_op_rgn(paddr, sz, op)
+
 noinline static void slc_entire_op(const int op)
 {
 	unsigned int ctrl, r = ARC_REG_SLC_CTRL;
@@ -1095,7 +1147,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
  */
 noinline void __init arc_ioc_setup(void)
 {
-	unsigned int ap_sz;
+	unsigned int ioc_base, mem_sz;
 
 	/* Flush + invalidate + disable L1 dcache */
 	__dc_disable();
@@ -1104,18 +1156,29 @@ noinline void __init arc_ioc_setup(void)
 	if (read_aux_reg(ARC_REG_SLC_BCR))
 		slc_entire_op(OP_FLUSH_N_INV);
 
-	/* IOC Aperture start: TDB: handle non default CONFIG_LINUX_LINK_BASE */
-	write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
-
 	/*
-	 * IOC Aperture size:
-	 *   decoded as 2 ^ (SIZE + 2) KB: so setting 0x11 implies 512M
+	 * currently IOC Aperture covers entire DDR
 	 * TBD: fix for PGU + 1GB of low mem
 	 * TBD: fix for PAE
 	 */
-	ap_sz = order_base_2(arc_get_mem_sz()/1024) - 2;
-	write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, ap_sz);
+	mem_sz = arc_get_mem_sz();
 
+	if (!is_power_of_2(mem_sz) || mem_sz < 4096)
+		panic("IOC Aperture size must be power of 2 larger than 4KB");
+
+	/*
+	 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
+	 * so setting 0x11 implies 512MB, 0x12 implies 1GB...
+	 */
+	write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2);
+
+	/* for now assume kernel base is start of IOC aperture */
+	ioc_base = CONFIG_LINUX_RAM_BASE;
+
+	if (ioc_base % mem_sz != 0)
+		panic("IOC Aperture start must be aligned to the size of the aperture");
+
+	write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12);
 	write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
 	write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
 
@@ -1207,7 +1270,7 @@ void __ref arc_cache_init(void)
 	unsigned int __maybe_unused cpu = smp_processor_id();
 	char str[256];
 
-	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
 	if (!cpu)
 		arc_cache_init_master();
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 162c975..a0b7bd6 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -207,7 +207,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
 	/* Are we prepared to handle this kernel fault?
 	 *
 	 * (The kernel has valid exception-points in the source
-	 *  when it acesses user-memory. When it fails in one
+	 *  when it accesses user-memory. When it fails in one
 	 *  of those points, we find it in a table and do a jump
 	 *  to some fixup code that loads an appropriate error
 	 *  code)
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 8c9415e..ba14506 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -26,7 +26,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
 char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
 EXPORT_SYMBOL(empty_zero_page);
 
-static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE;
+static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE;
 static unsigned long low_mem_sz;
 
 #ifdef CONFIG_HIGHMEM
@@ -63,7 +63,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 
 	if (!low_mem_sz) {
 		if (base != low_mem_start)
-			panic("CONFIG_LINUX_LINK_BASE != DT memory { }");
+			panic("CONFIG_LINUX_RAM_BASE != DT memory { }");
 
 		low_mem_sz = size;
 		in_use = 1;
@@ -161,7 +161,7 @@ void __init setup_arch_memory(void)
 	 * We can't use the helper free_area_init(zones[]) because it uses
 	 * PAGE_OFFSET to compute the @min_low_pfn which would be wrong
 	 * when our kernel doesn't start at PAGE_OFFSET, i.e.
-	 * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE
+	 * PAGE_OFFSET != CONFIG_LINUX_RAM_BASE
 	 */
 	free_area_init_node(0,			/* node-id */
 			    zones_size,		/* num pages per zone */
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index b181f3e..8ceefbf 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -821,7 +821,7 @@ void arc_mmu_init(void)
 	char str[256];
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
 
-	printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
+	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
 
 	/*
 	 * Can't be done in processor.h due to header include depenedencies
@@ -908,9 +908,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 	local_irq_save(flags);
 
-	/* re-enable the MMU */
-	write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
-
 	/* loop thru all sets of TLB */
 	for (set = 0; set < mmu->sets; set++) {
 
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index b30e4e3..0e1e47a 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -274,6 +274,13 @@
 .macro COMMIT_ENTRY_TO_MMU
 #if (CONFIG_ARC_MMU_VER < 4)
 
+#ifdef CONFIG_EZNPS_MTM_EXT
+	/* verify if entry for this vaddr+ASID already exists */
+	sr    TLBProbe, [ARC_REG_TLBCOMMAND]
+	lr    r0, [ARC_REG_TLBINDEX]
+	bbit0 r0, 31, 88f
+#endif
+
 	/* Get free TLB slot: Set = computed from vaddr, way = random */
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
 
@@ -287,6 +294,8 @@
 #else
 	sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
 #endif
+
+88:
 .endm
 
 
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 38ff349..f1ac679 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -80,22 +80,6 @@ static void __init axs10x_enable_gpio_intc_wire(void)
 	iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN);
 }
 
-static inline void __init
-write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg)
-{
-	unsigned int loops = 128 * 1024, ctr;
-
-	iowrite32(value, reg);
-
-	ctr = loops;
-	while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */
-		cpu_relax();
-
-	ctr = loops;
-	while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */
-		cpu_relax();
-}
-
 static void __init axs10x_print_board_ver(unsigned int creg, const char *str)
 {
 	union ver {
@@ -314,7 +298,6 @@ static void __init axs101_early_init(void)
 
 #ifdef CONFIG_AXS103
 
-#define AXC003_CGU	0xF0000000
 #define AXC003_CREG	0xF0001000
 #define AXC003_MST_AXI_TUNNEL	0
 #define AXC003_MST_HS38		1
@@ -324,131 +307,38 @@ static void __init axs101_early_init(void)
 #define CREG_CPU_TUN_IO_CTRL	(AXC003_CREG + 0x494)
 
 
-union pll_reg {
-	struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6;
-#else
-		unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17;
-#endif
-	};
-	unsigned int val;
-};
-
-static unsigned int __init axs103_get_freq(void)
-{
-	union pll_reg idiv, fbdiv, odiv;
-	unsigned int f = 33333333;
-
-	idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0);
-	fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4);
-	odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8);
-
-	if (idiv.bypass != 1)
-		f = f / (idiv.low + idiv.high);
-
-	if (fbdiv.bypass != 1)
-		f = f * (fbdiv.low + fbdiv.high);
-
-	if (odiv.bypass != 1)
-		f = f / (odiv.low + odiv.high);
-
-	f = (f + 500000) / 1000000; /* Rounding */
-	return f;
-}
-
-static inline unsigned int __init encode_div(unsigned int id, int upd)
-{
-	union pll_reg div;
-
-	div.val = 0;
-
-	div.noupd = !upd;
-	div.bypass = id == 1 ? 1 : 0;
-	div.edge = (id%2 == 0) ? 0 : 1;  /* 0 = rising */
-	div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1;
-	div.high = id >> 1;
-
-	return div.val;
-}
-
-noinline static void __init
-axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
-{
-	write_cgu_reg(encode_div(id, 0),
-		      (void __iomem *)AXC003_CGU + 0x80 + 0,
-		      (void __iomem *)AXC003_CGU + 0x110);
-
-	write_cgu_reg(encode_div(fd, 0),
-		      (void __iomem *)AXC003_CGU + 0x80 + 4,
-		      (void __iomem *)AXC003_CGU + 0x110);
-
-	write_cgu_reg(encode_div(od, 1),
-		      (void __iomem *)AXC003_CGU + 0x80 + 8,
-		      (void __iomem *)AXC003_CGU + 0x110);
-}
-
 static void __init axs103_early_init(void)
 {
-	int offset = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
-	const struct fdt_property *prop = fdt_get_property(initial_boot_params,
-							   offset,
-							   "clock-frequency",
-							   NULL);
-	u32 freq = be32_to_cpu(*(u32*)(prop->data)) / 1000000, orig = freq;
-
+#ifdef CONFIG_ARC_MCIP
 	/*
 	 * AXS103 configurations for SMP/QUAD configurations share device tree
-	 * which defaults to 90 MHz. However recent failures of Quad config
+	 * which defaults to 100 MHz. However recent failures of Quad config
 	 * revealed P&R timing violations so clamp it down to safe 50 MHz
 	 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
-	 *
-	 * This hack is really hacky as of now. Fix it properly by getting the
-	 * number of cores as return value of platform's early SMP callback
+	 * of fudging the freq in DT
 	 */
-#ifdef CONFIG_ARC_MCIP
 	unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
-	if (num_cores > 2)
-		freq = 50;
-#endif
-
-	switch (freq) {
-	case 33:
-		axs103_set_freq(1, 1, 1);
-		break;
-	case 50:
-		axs103_set_freq(1, 30, 20);
-		break;
-	case 75:
-		axs103_set_freq(2, 45, 10);
-		break;
-	case 90:
-		axs103_set_freq(2, 54, 10);
-		break;
-	case 100:
-		axs103_set_freq(1, 30, 10);
-		break;
-	case 125:
-		axs103_set_freq(2, 45,  6);
-		break;
-	default:
+	if (num_cores > 2) {
+		u32 freq = 50, orig;
 		/*
-		 * In this case, core_frequency derived from
-		 * DT "clock-frequency" might not match with board value.
-		 * Hence update it to match the board value.
+		 * TODO: use cpu node "cpu-freq" param instead of platform-specific
+		 * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
 		 */
-		freq = axs103_get_freq();
-		break;
-	}
+		int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
+		const struct fdt_property *prop;
 
-	pr_info("Freq is %dMHz\n", freq);
+		prop = fdt_get_property(initial_boot_params, off,
+					"clock-frequency", NULL);
+		orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000;
 
-	/* Patching .dtb in-place with new core clock value */
-	if (freq != orig ) {
-		freq = cpu_to_be32(freq * 1000000);
-		fdt_setprop_inplace(initial_boot_params, offset,
-				    "clock-frequency", &freq, sizeof(freq));
+		/* Patching .dtb in-place with new core clock value */
+		if (freq != orig ) {
+			freq = cpu_to_be32(freq * 1000000);
+			fdt_setprop_inplace(initial_boot_params, off,
+					    "clock-frequency", &freq, sizeof(freq));
+		}
 	}
+#endif
 
 	/* Memory maps already config in pre-bootloader */
 
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
index 1595a38..e151e20 100644
--- a/arch/arc/plat-eznps/Kconfig
+++ b/arch/arc/plat-eznps/Kconfig
@@ -12,8 +12,8 @@
 	help
 	  Support for EZchip development platforms,
 	  based on ARC700 cores.
-	  We handle few flavours:
-	    - Hardware Emulator AKA HE which is FPGA based chasis
+	  We handle few flavors:
+	    - Hardware Emulator AKA HE which is FPGA based chassis
 	    - Simulator based on MetaWare nSIM
 	    - NPS400 chip based on ASIC
 
@@ -32,3 +32,25 @@
 	  any of them seem like CPU from Linux point of view.
 	  All threads within same core share the execution unit of the
 	  core and HW scheduler round robin between them.
+
+config EZNPS_MEM_ERROR_ALIGN
+       bool "ARC-EZchip Memory error as an exception"
+       depends on EZNPS_MTM_EXT
+       default n
+       help
+	  On the real chip of the NPS, user memory errors are handled
+	  as a machine check exception, which is fatal, whereas on
+	  simulator platform for NPS, is handled as a Level 2 interrupt
+	  (just a stock ARC700) which is recoverable. This option makes
+	  simulator behave like hardware.
+
+config EZNPS_SHARED_AUX_REGS
+	bool "ARC-EZchip Shared Auxiliary Registers Per Core"
+	depends on ARC_PLAT_EZNPS
+	default y
+	help
+	  On the real chip of the NPS, auxiliary registers are shared between
+	  all the cpus of the core, whereas on simulator platform for NPS,
+	  each cpu has a different set of auxiliary registers. Configuration
+	  should be unset if auxiliary registers are not shared between the cpus
+	  of the core, so there will be a need to initialize them per cpu.
diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile
index 21091b1..8d43717 100644
--- a/arch/arc/plat-eznps/Makefile
+++ b/arch/arc/plat-eznps/Makefile
@@ -2,6 +2,6 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := entry.o platform.o
+obj-y := entry.o platform.o ctop.o
 obj-$(CONFIG_SMP) += smp.o
 obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o
diff --git a/arch/arc/plat-eznps/ctop.c b/arch/arc/plat-eznps/ctop.c
new file mode 100644
index 0000000..030bcd0
--- /dev/null
+++ b/arch/arc/plat-eznps/ctop.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <plat/ctop.h>
+
+void dp_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+	struct eznps_dp *prev_task_dp = &prev->thread.dp;
+	struct eznps_dp *next_task_dp = &next->thread.dp;
+
+	/* Here we save all Data Plane related auxiliary registers */
+	prev_task_dp->eflags = read_aux_reg(CTOP_AUX_EFLAGS);
+	write_aux_reg(CTOP_AUX_EFLAGS, next_task_dp->eflags);
+
+	prev_task_dp->gpa1 = read_aux_reg(CTOP_AUX_GPA1);
+	write_aux_reg(CTOP_AUX_GPA1, next_task_dp->gpa1);
+}
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
index 328261c..091c92c 100644
--- a/arch/arc/plat-eznps/entry.S
+++ b/arch/arc/plat-eznps/entry.S
@@ -27,7 +27,7 @@
 	.align 1024	; HW requierment for restart first PC
 
 ENTRY(res_service)
-#ifdef CONFIG_EZNPS_MTM_EXT
+#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS)
 	; There is no work for HW thread id != 0
 	lr	r3, [CTOP_AUX_THREAD_ID]
 	cmp	r3, 0
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
index ee2e32d..0c7d110 100644
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@@ -39,6 +39,7 @@
 #define CTOP_AUX_LOGIC_CORE_ID			(CTOP_AUX_BASE + 0x018)
 #define CTOP_AUX_MT_CTRL			(CTOP_AUX_BASE + 0x020)
 #define CTOP_AUX_HW_COMPLY			(CTOP_AUX_BASE + 0x024)
+#define CTOP_AUX_DPC				(CTOP_AUX_BASE + 0x02C)
 #define CTOP_AUX_LPC				(CTOP_AUX_BASE + 0x030)
 #define CTOP_AUX_EFLAGS				(CTOP_AUX_BASE + 0x080)
 #define CTOP_AUX_IACK				(CTOP_AUX_BASE + 0x088)
@@ -46,6 +47,7 @@
 #define CTOP_AUX_UDMC				(CTOP_AUX_BASE + 0x300)
 
 /* EZchip core instructions */
+#define CTOP_INST_HWSCHD_WFT_IE12		0x3E6F7344
 #define CTOP_INST_HWSCHD_OFF_R4			0x3C6F00BF
 #define CTOP_INST_HWSCHD_RESTORE_R4		0x3E6F7103
 #define CTOP_INST_SCHD_RW			0x3E6F7004
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
index aaaaffd..2388de3 100644
--- a/arch/arc/plat-eznps/mtm.c
+++ b/arch/arc/plat-eznps/mtm.c
@@ -21,10 +21,22 @@
 #include <plat/mtm.h>
 #include <plat/smp.h>
 
-#define MT_CTRL_HS_CNT		0xFF
+#define MT_HS_CNT_MIN		0x01
+#define MT_HS_CNT_MAX		0xFF
 #define MT_CTRL_ST_CNT		0xF
 #define NPS_NUM_HW_THREADS	0x10
 
+static int mtm_hs_ctr = MT_HS_CNT_MAX;
+
+#ifdef CONFIG_EZNPS_MEM_ERROR_ALIGN
+int do_memory_error(unsigned long address, struct pt_regs *regs)
+{
+	die("Invalid Mem Access", regs, address);
+
+	return 1;
+}
+#endif
+
 static void mtm_init_nat(int cpu)
 {
 	struct nps_host_reg_mtm_cfg mtm_cfg;
@@ -98,6 +110,18 @@ void mtm_enable_core(unsigned int cpu)
 	int i;
 	struct nps_host_reg_aux_mt_ctrl mt_ctrl;
 	struct nps_host_reg_mtm_cfg mtm_cfg;
+	struct nps_host_reg_aux_dpc dpc;
+
+	/*
+	 * Initializing dpc register in each CPU.
+	 * Overwriting the init value of the DPC
+	 * register so that CMEM and FMT virtual address
+	 * spaces are accessible, and Data Plane HW
+	 * facilities are enabled.
+	 */
+	dpc.ien = 1;
+	dpc.men = 1;
+	write_aux_reg(CTOP_AUX_DPC, dpc.value);
 
 	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
 		return;
@@ -118,9 +142,7 @@ void mtm_enable_core(unsigned int cpu)
 	/* Enable HW schedule, stall counter, mtm */
 	mt_ctrl.value = 0;
 	mt_ctrl.hsen = 1;
-	mt_ctrl.hs_cnt = MT_CTRL_HS_CNT;
-	mt_ctrl.sten = 1;
-	mt_ctrl.st_cnt = MT_CTRL_ST_CNT;
+	mt_ctrl.hs_cnt = mtm_hs_ctr;
 	mt_ctrl.mten = 1;
 	write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value);
 
@@ -131,3 +153,23 @@ void mtm_enable_core(unsigned int cpu)
 	 */
 	cpu_relax();
 }
+
+/* Verify and set the value of the mtm hs counter */
+static int __init set_mtm_hs_ctr(char *ctr_str)
+{
+	long hs_ctr;
+	int ret;
+
+	ret = kstrtol(ctr_str, 0, &hs_ctr);
+
+	if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) {
+		pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n",
+		       hs_ctr, MT_HS_CNT_MIN, MT_HS_CNT_MAX);
+		return -EINVAL;
+	}
+
+	mtm_hs_ctr = hs_ctr;
+
+	return 0;
+}
+early_param("nps_mtm_hs_ctr", set_mtm_hs_ctr);
diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
new file mode 100644
index 0000000..5a6ed5a
--- /dev/null
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -0,0 +1,9 @@
+# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_SOC_HSDK
+       bool "ARC HS Development Kit SOC"
diff --git a/arch/arc/plat-hsdk/Makefile b/arch/arc/plat-hsdk/Makefile
new file mode 100644
index 0000000..9a50c51
--- /dev/null
+++ b/arch/arc/plat-hsdk/Makefile
@@ -0,0 +1,9 @@
+#
+# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-y := platform.o
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
new file mode 100644
index 0000000..a2e7fd1
--- /dev/null
+++ b/arch/arc/plat-hsdk/platform.c
@@ -0,0 +1,66 @@
+/*
+ * ARC HSDK Platform support code
+ *
+ * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/arcregs.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+
+#define ARC_CCM_UNUSED_ADDR	0x60000000
+
+static void __init hsdk_init_per_cpu(unsigned int cpu)
+{
+	/*
+	 * By default ICCM is mapped to 0x7z while this area is used for
+	 * kernel virtual mappings, so move it to currently unused area.
+	 */
+	if (cpuinfo_arc700[cpu].iccm.sz)
+		write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR);
+
+	/*
+	 * By default DCCM is mapped to 0x8z while this area is used by kernel,
+	 * so move it to currently unused area.
+	 */
+	if (cpuinfo_arc700[cpu].dccm.sz)
+		write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR);
+}
+
+#define ARC_PERIPHERAL_BASE	0xf0000000
+#define CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000)
+#define CREG_PAE		(CREG_BASE + 0x180)
+#define CREG_PAE_UPDATE		(CREG_BASE + 0x194)
+
+static void __init hsdk_init_early(void)
+{
+	/*
+	 * PAE remapping for DMA clients does not work due to an RTL bug, so
+	 * CREG_PAE register must be programmed to all zeroes, otherwise it
+	 * will cause problems with DMA to/from peripherals even if PAE40 is
+	 * not used.
+	 */
+
+	/* Default is 1, which means "PAE offset = 4GByte" */
+	writel_relaxed(0, (void __iomem *) CREG_PAE);
+
+	/* Really apply settings made above */
+	writel(1, (void __iomem *) CREG_PAE_UPDATE);
+}
+
+static const char *hsdk_compat[] __initconst = {
+	"snps,hsdk",
+	NULL,
+};
+
+MACHINE_START(SIMULATION, "hsdk")
+	.dt_compat	= hsdk_compat,
+	.init_early     = hsdk_init_early,
+	.init_per_cpu	= hsdk_init_per_cpu,
+MACHINE_END
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index ebf020b..c878145 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -227,7 +227,6 @@
 
 #define HSR_DABT_S1PTW		(_AC(1, UL) << 7)
 #define HSR_DABT_CM		(_AC(1, UL) << 8)
-#define HSR_DABT_EA		(_AC(1, UL) << 9)
 
 #define kvm_arm_exception_type	\
 	{0, "RESET" }, 		\
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 9a8a45a..98089ff 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -149,11 +149,6 @@ static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
 	return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
 }
 
-static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
-{
-	return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA;
-}
-
 static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
@@ -206,6 +201,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
 
+static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
+{
+	switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
+	case FSC_SEA:
+	case FSC_SEA_TTW0:
+	case FSC_SEA_TTW1:
+	case FSC_SEA_TTW2:
+	case FSC_SEA_TTW3:
+	case FSC_SECC:
+	case FSC_SECC_TTW0:
+	case FSC_SECC_TTW1:
+	case FSC_SECC_TTW2:
+	case FSC_SECC_TTW3:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 54442e3..cf8bf6b 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
 		trace_kvm_wfx(*vcpu_pc(vcpu), true);
 		vcpu->stat.wfe_exit_stat++;
-		kvm_vcpu_on_spin(vcpu);
+		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
 	} else {
 		trace_kvm_wfx(*vcpu_pc(vcpu), false);
 		vcpu->stat.wfi_exit_stat++;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 69c56f7..5b78ce1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -238,8 +238,10 @@
 		linux,pci-domain = <0>;
 		max-link-speed = <1>;
 		msi-map = <0x0 &its 0x0 0x1000>;
-		phys = <&pcie_phy>;
-		phy-names = "pcie-phy";
+		phys = <&pcie_phy 0>, <&pcie_phy 1>,
+		       <&pcie_phy 2>, <&pcie_phy 3>;
+		phy-names = "pcie-phy-0", "pcie-phy-1",
+			    "pcie-phy-2", "pcie-phy-3";
 		ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x1e00000
 			  0x81000000 0x0 0xfbe00000 0x0 0xfbe00000 0x0 0x100000>;
 		resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
@@ -1295,7 +1297,7 @@
 			compatible = "rockchip,rk3399-pcie-phy";
 			clocks = <&cru SCLK_PCIEPHY_REF>;
 			clock-names = "refclk";
-			#phy-cells = <0>;
+			#phy-cells = <1>;
 			resets = <&cru SRST_PCIEPHY>;
 			reset-names = "phy";
 			status = "disabled";
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fe39e68..e5df3fc 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -188,11 +188,6 @@ static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
 	return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
 }
 
-static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
-{
-	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA);
-}
-
 static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
 {
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
@@ -240,6 +235,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
 }
 
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+	switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
+	case FSC_SEA:
+	case FSC_SEA_TTW0:
+	case FSC_SEA_TTW1:
+	case FSC_SEA_TTW2:
+	case FSC_SEA_TTW3:
+	case FSC_SECC:
+	case FSC_SECC_TTW0:
+	case FSC_SECC_TTW1:
+	case FSC_SECC_TTW2:
+	case FSC_SECC_TTW3:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 {
 	u32 esr = kvm_vcpu_get_hsr(vcpu);
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index e2b7e4f..0e2ea1c 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -22,23 +22,6 @@
 #include <linux/pci-ecam.h>
 #include <linux/slab.h>
 
-/*
- * Called after each bus is probed, but before its children are examined
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-	/* nothing to do, expected to be removed in the future */
-}
-
-/*
- * We don't have to worry about legacy ISA devices, so nothing to do here
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-
 #ifdef CONFIG_ACPI
 /*
  * Try to assign the IRQ number when probing a new device
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 17d8a16..7debb74 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
 		vcpu->stat.wfe_exit_stat++;
-		kvm_vcpu_on_spin(vcpu);
+		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
 		vcpu->stat.wfi_exit_stat++;
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 116786d..c77d508 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -208,29 +208,12 @@ static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
 static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			    const struct sys_reg_desc *r, u8 apr)
 {
-	struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
 	u8 idx = r->Op2 & 3;
 
-	/*
-	 * num_pri_bits are initialized with HW supported values.
-	 * We can rely safely on num_pri_bits even if VM has not
-	 * restored ICC_CTLR_EL1 before restoring APnR registers.
-	 */
-	switch (vgic_v3_cpu->num_pri_bits) {
-	case 7:
-		vgic_v3_access_apr_reg(vcpu, p, apr, idx);
-		break;
-	case 6:
-		if (idx > 1)
-			goto err;
-		vgic_v3_access_apr_reg(vcpu, p, apr, idx);
-		break;
-	default:
-		if (idx > 0)
-			goto err;
-		vgic_v3_access_apr_reg(vcpu, p, apr, idx);
-	}
+	if (idx > vgic_v3_max_apr_idx(vcpu))
+		goto err;
 
+	vgic_v3_access_apr_reg(vcpu, p, apr, idx);
 	return true;
 err:
 	if (!p->is_write)
diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c
index 394c2a73..5cc622c 100644
--- a/arch/cris/arch-v32/drivers/pci/bios.c
+++ b/arch/cris/arch-v32/drivers/pci/bios.c
@@ -2,10 +2,6 @@
 #include <linux/kernel.h>
 #include <hwregs/intr_vect.h>
 
-void pcibios_fixup_bus(struct pci_bus *b)
-{
-}
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 4068bde..f5ec736 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -411,13 +411,6 @@ pcibios_disable_device (struct pci_dev *dev)
 		acpi_pci_irq_disable(dev);
 }
 
-resource_size_t
-pcibios_align_resource (void *data, const struct resource *res,
-		        resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-
 /**
  * ia64_pci_get_legacy_mem - generic legacy mem routine
  * @bus: bus to get legacy memory base address for
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index 6a640be..3097fa2 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -243,6 +243,13 @@ static struct resource mcf_pci_io = {
 	.flags	= IORESOURCE_IO,
 };
 
+static struct resource busn_resource = {
+	.name	= "PCI busn",
+	.start	= 0,
+	.end	= 255,
+	.flags	= IORESOURCE_BUS,
+};
+
 /*
  * Interrupt mapping and setting.
  */
@@ -258,6 +265,13 @@ static int mcf_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 
 static int __init mcf_pci_init(void)
 {
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return -ENOMEM;
+
 	pr_info("ColdFire: PCI bus initialization...\n");
 
 	/* Reset the external PCI bus */
@@ -312,14 +326,28 @@ static int __init mcf_pci_init(void)
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	schedule_timeout(msecs_to_jiffies(200));
 
-	rootbus = pci_scan_bus(0, &mcf_pci_ops, NULL);
-	if (!rootbus)
-		return -ENODEV;
+
+	pci_add_resource(&bridge->windows, &ioport_resource);
+	pci_add_resource(&bridge->windows, &iomem_resource);
+	pci_add_resource(&bridge->windows, &busn_resource);
+	bridge->dev.parent = NULL;
+	bridge->sysdata = NULL;
+	bridge->busnr = 0;
+	bridge->ops = &mcf_pci_ops;
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = mcf_pci_map_irq;
+
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
+		return ret;
+	}
+
+	rootbus = bridge->bus;
 
 	rootbus->resource[0] = &mcf_pci_io;
 	rootbus->resource[1] = &mcf_pci_mem;
 
-	pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq);
 	pci_bus_size_bridges(rootbus);
 	pci_bus_assign_resources(rootbus);
 	pci_bus_add_devices(rootbus);
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index efd4983..114b934 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -81,9 +81,6 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 
 #define HAVE_ARCH_PCI_RESOURCE_TO_USER
 
-extern void pcibios_setup_bus_devices(struct pci_bus *bus);
-extern void pcibios_setup_bus_self(struct pci_bus *bus);
-
 /* This part of code was originally in xilinx-pci.h */
 #ifdef CONFIG_PCI_XILINX
 extern void __init xilinx_pci_init(void);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 180f475..ae79e86 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -678,144 +678,6 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
 
-/* This function tries to figure out if a bridge resource has been initialized
- * by the firmware or not. It doesn't have to be absolutely bullet proof, but
- * things go more smoothly when it gets it right. It should covers cases such
- * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges
- */
-static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
-						 struct resource *res)
-{
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pci_dev *dev = bus->self;
-	resource_size_t offset;
-	u16 command;
-	int i;
-
-	/* Job is a bit different between memory and IO */
-	if (res->flags & IORESOURCE_MEM) {
-		/* If the BAR is non-0 (res != pci_mem_offset) then it's
-		 * probably been initialized by somebody
-		 */
-		if (res->start != hose->pci_mem_offset)
-			return 0;
-
-		/* The BAR is 0, let's check if memory decoding is enabled on
-		 * the bridge. If not, we consider it unassigned
-		 */
-		pci_read_config_word(dev, PCI_COMMAND, &command);
-		if ((command & PCI_COMMAND_MEMORY) == 0)
-			return 1;
-
-		/* Memory decoding is enabled and the BAR is 0. If any of
-		 * the bridge resources covers that starting address (0 then
-		 * it's good enough for us for memory
-		 */
-		for (i = 0; i < 3; i++) {
-			if ((hose->mem_resources[i].flags & IORESOURCE_MEM) &&
-			   hose->mem_resources[i].start == hose->pci_mem_offset)
-				return 0;
-		}
-
-		/* Well, it starts at 0 and we know it will collide so we may as
-		 * well consider it as unassigned. That covers the Apple case.
-		 */
-		return 1;
-	} else {
-		/* If the BAR is non-0, then we consider it assigned */
-		offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-		if (((res->start - offset) & 0xfffffffful) != 0)
-			return 0;
-
-		/* Here, we are a bit different than memory as typically IO
-		 * space starting at low addresses -is- valid. What we do
-		 * instead if that we consider as unassigned anything that
-		 * doesn't have IO enabled in the PCI command register,
-		 * and that's it.
-		 */
-		pci_read_config_word(dev, PCI_COMMAND, &command);
-		if (command & PCI_COMMAND_IO)
-			return 0;
-
-		/* It's starting at 0 and IO is disabled in the bridge, consider
-		 * it unassigned
-		 */
-		return 1;
-	}
-}
-
-/* Fixup resources of a PCI<->PCI bridge */
-static void pcibios_fixup_bridge(struct pci_bus *bus)
-{
-	struct resource *res;
-	int i;
-
-	struct pci_dev *dev = bus->self;
-
-	pci_bus_for_each_resource(bus, res, i) {
-		if (!res)
-			continue;
-		if (!res->flags)
-			continue;
-		if (i >= 3 && bus->self->transparent)
-			continue;
-
-		pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n",
-			 pci_name(dev), i,
-			 (unsigned long long)res->start,
-			 (unsigned long long)res->end,
-			 (unsigned int)res->flags);
-
-		/* Try to detect uninitialized P2P bridge resources,
-		 * and clear them out so they get re-assigned later
-		 */
-		if (pcibios_uninitialized_bridge_resource(bus, res)) {
-			res->flags = 0;
-			pr_debug("PCI:%s            (unassigned)\n",
-								pci_name(dev));
-		} else {
-			pr_debug("PCI:%s            %016llx-%016llx\n",
-				 pci_name(dev),
-				 (unsigned long long)res->start,
-				 (unsigned long long)res->end);
-		}
-	}
-}
-
-void pcibios_setup_bus_self(struct pci_bus *bus)
-{
-	/* Fix up the bus resources for P2P bridges */
-	if (bus->self != NULL)
-		pcibios_fixup_bridge(bus);
-}
-
-void pcibios_setup_bus_devices(struct pci_bus *bus)
-{
-	struct pci_dev *dev;
-
-	pr_debug("PCI: Fixup bus devices %d (%s)\n",
-		 bus->number, bus->self ? pci_name(bus->self) : "PHB");
-
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		/* Setup OF node pointer in archdata */
-		dev->dev.of_node = pci_device_to_OF_node(dev);
-
-		/* Fixup NUMA node as it may not be setup yet by the generic
-		 * code and is needed by the DMA init
-		 */
-		set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
-
-		/* Read default IRQs and fixup if necessary */
-		dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
-	}
-}
-
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-	/* nothing to do */
-}
-EXPORT_SYMBOL(pcibios_fixup_bus);
-
 /*
  * We need to avoid collisions with `mirrored' VGA ports
  * and other strange ISA hardware, so we always want the
@@ -829,13 +691,6 @@ EXPORT_SYMBOL(pcibios_fixup_bus);
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
 int pcibios_add_device(struct pci_dev *dev)
 {
 	dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d4b2ad1..bce2a64 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	return !!(vcpu->arch.pending_exceptions);
 }
 
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index 174575a..fc77260 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -78,6 +78,12 @@ static void pcibios_scanbus(struct pci_controller *hose)
 	static int need_domain_info;
 	LIST_HEAD(resources);
 	struct pci_bus *bus;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return;
 
 	if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY))
 		next_busno = (*hose->get_busno)();
@@ -87,18 +93,24 @@ static void pcibios_scanbus(struct pci_controller *hose)
 	pci_add_resource_offset(&resources,
 				hose->io_resource, hose->io_offset);
 	pci_add_resource(&resources, hose->busn_resource);
-	bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
-				&resources);
-	hose->bus = bus;
+	list_splice_init(&resources, &bridge->windows);
+	bridge->dev.parent = NULL;
+	bridge->sysdata = hose;
+	bridge->busnr = next_busno;
+	bridge->ops = hose->pci_ops;
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = pcibios_map_irq;
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
+		return;
+	}
+
+	hose->bus = bus = bridge->bus;
 
 	need_domain_info = need_domain_info || pci_domain_nr(bus);
 	set_pci_need_domain_info(hose, need_domain_info);
 
-	if (!bus) {
-		pci_free_resource_list(&resources);
-		return;
-	}
-
 	next_busno = bus->busn_res.end + 1;
 	/* Don't allow 8-bit bus number overflow inside the hose -
 	   reserve some space for bridges. */
@@ -224,8 +236,6 @@ static int __init pcibios_init(void)
 	list_for_each_entry(hose, &controllers, list)
 		pcibios_scanbus(hose);
 
-	pci_fixup_irqs(pci_common_swizzle, pcibios_map_irq);
-
 	pci_initialized = 1;
 
 	return 0;
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index f28d21c..508275b 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -104,6 +104,7 @@
 #define HPTE_R_C		ASM_CONST(0x0000000000000080)
 #define HPTE_R_R		ASM_CONST(0x0000000000000100)
 #define HPTE_R_KEY_LO		ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY		(HPTE_R_KEY_LO | HPTE_R_KEY_HI)
 
 #define HPTE_V_1TB_SEG		ASM_CONST(0x4000000000000000)
 #define HPTE_V_VRMA_MASK	ASM_CONST(0x4001ffffff000000)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 67075e0..7c62967 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1941,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
 	rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
 	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
 	if (ret < 0) {
+		kfree(ctx);
 		kvm_put_kvm(kvm);
 		return ret;
 	}
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 53766e2..8f2da8b 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
 {
 	struct kvmppc_spapr_tce_table *stt = filp->private_data;
 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+	struct kvm *kvm = stt->kvm;
 
+	mutex_lock(&kvm->lock);
 	list_del_rcu(&stt->list);
+	mutex_unlock(&kvm->lock);
 
 	list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
 		WARN_ON(!kref_read(&stit->kref));
@@ -298,7 +301,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	unsigned long npages, size;
 	int ret = -ENOMEM;
 	int i;
-	int fd = -1;
 
 	if (!args->size)
 		return -EINVAL;
@@ -328,11 +330,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 			goto fail;
 	}
 
-	ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-				    stt, O_RDWR | O_CLOEXEC);
-	if (ret < 0)
-		goto fail;
-
 	mutex_lock(&kvm->lock);
 
 	/* Check this LIOBN hasn't been previously allocated */
@@ -344,17 +341,19 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 		}
 	}
 
-	if (!ret) {
+	if (!ret)
+		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+				       stt, O_RDWR | O_CLOEXEC);
+
+	if (ret >= 0) {
 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
 		kvm_get_kvm(kvm);
 	}
 
 	mutex_unlock(&kvm->lock);
 
-	if (!ret)
-		return fd;
-
-	put_unused_fd(fd);
+	if (ret >= 0)
+		return ret;
 
  fail:
 	for (i = 0; i < npages; i++)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ebcf97c..18e974a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 
 	switch (subfunc) {
 	case H_VPA_REG_VPA:		/* register VPA */
-		if (len < sizeof(struct lppaca))
+		/*
+		 * The size of our lppaca is 1kB because of the way we align
+		 * it for the guest to avoid crossing a 4kB boundary. We only
+		 * use 640 bytes of the structure though, so we should accept
+		 * clients that set a size of 640.
+		 */
+		if (len < 640)
 			break;
 		vpap = &tvcpu->arch.vpa;
 		err = 0;
@@ -3336,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
 	if (radix_enabled())
 		return -EINVAL;
 
+	/*
+	 * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
+	 * POWER7 doesn't support keys for instruction accesses,
+	 * POWER8 and POWER9 do.
+	 */
+	info->data_keys = 32;
+	info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
+
 	info->flags = KVM_PPC_PAGE_SIZES_REAL;
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		info->flags |= KVM_PPC_1T_SEGMENTS;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fedb013..4efe364 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	if (!realmode)
 		local_irq_restore(irq_flags);
 
-	ptel &= ~(HPTE_R_PP0 - psize);
+	ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
 	ptel |= pa;
 
 	if (pa)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2259b6c..663a4a8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -982,7 +982,7 @@
 #ifdef CONFIG_KVM_XICS
 	/* We are entering the guest on that thread, push VCPU to XIVE */
 	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
-	cmpldi	cr0, r10, r0
+	cmpldi	cr0, r10, 0
 	beq	no_xive
 	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
 	li	r9, TM_QW1_OS
@@ -1286,7 +1286,8 @@
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
 	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,0
+	EXTEND_HDEC(r3)
+	cmpdi	r3,0
 	mr	r4,r9
 	bge	fast_guest_return
 2:
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 32fdab5..f9f6468 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
 	if (err)
 		goto free_vcpu;
 
-	if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+	if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
+		err = -ENOMEM;
 		goto uninit_vcpu;
+	}
 
 	err = kvmppc_e500_tlb_init(vcpu_e500);
 	if (err)
 		goto uninit_id;
 
 	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-	if (!vcpu->arch.shared)
+	if (!vcpu->arch.shared) {
+		err = -ENOMEM;
 		goto uninit_tlb;
+	}
 
 	return vcpu;
 
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index f48a0c2..d0b6b57 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
 		goto uninit_vcpu;
 
 	vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-	if (!vcpu->arch.shared)
+	if (!vcpu->arch.shared) {
+		err = -ENOMEM;
 		goto uninit_tlb;
+	}
 
 	return vcpu;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1a75c0b..3480faa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
 }
 
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a409d59..51375e7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -226,7 +226,9 @@ struct kvm_s390_sie_block {
 #define ECB3_RI  0x01
 	__u8    ecb3;			/* 0x0063 */
 	__u32	scaol;			/* 0x0064 */
-	__u8	reserved68[4];		/* 0x0068 */
+	__u8	reserved68;		/* 0x0068 */
+	__u8    epdx;			/* 0x0069 */
+	__u8    reserved6a[2];		/* 0x006a */
 	__u32	todpr;			/* 0x006c */
 	__u8	reserved70[16];		/* 0x0070 */
 	__u64	mso;			/* 0x0080 */
@@ -265,6 +267,7 @@ struct kvm_s390_sie_block {
 	__u64	cbrlo;			/* 0x01b8 */
 	__u8	reserved1c0[8];		/* 0x01c0 */
 #define ECD_HOSTREGMGMT	0x20000000
+#define ECD_MEF		0x08000000
 	__u32	ecd;			/* 0x01c8 */
 	__u8	reserved1cc[18];	/* 0x01cc */
 	__u64	pp;			/* 0x01de */
@@ -739,6 +742,7 @@ struct kvm_arch{
 	struct kvm_s390_cpu_model model;
 	struct kvm_s390_crypto crypto;
 	struct kvm_s390_vsie vsie;
+	u8 epdx;
 	u64 epoch;
 	struct kvm_s390_migration_state *migration_state;
 	/* subset of available cpu features enabled by user space */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index ca21b28..22b0f49 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -15,6 +15,6 @@
 #define ESSA_SET_STABLE_IF_RESIDENT	6
 #define ESSA_SET_STABLE_NODAT		7
 
-#define ESSA_MAX	ESSA_SET_STABLE_IF_RESIDENT
+#define ESSA_MAX	ESSA_SET_STABLE_NODAT
 
 #endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 69d09c3..cd7359e 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_HIGH		1
+#define KVM_S390_VM_TOD_EXT		2
+
+struct kvm_s390_vm_tod_clock {
+	__u8  epoch_idx;
+	__u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index e4d3609..d93a2c0 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
 	vcpu->stat.diagnose_44++;
-	kvm_vcpu_on_spin(vcpu);
+	kvm_vcpu_on_spin(vcpu, true);
 	return 0;
 }
 
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index c2e0ddc..bcbd866 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b)
 		return (addr >= a) && (addr <= b);
 	else
 		/* "overflowing" interval */
-		return (addr <= a) && (addr >= b);
+		return (addr >= a) || (addr <= b);
 }
 
 #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a619dda..a832ad0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
 	struct kvm_s390_mchk_info *mchk;
 	union mci mci;
 	__u64 cr14 = 0;         /* upper bits are not used */
+	int rc;
 
 	mci.val = mcck_info->mcic;
 	if (mci.sr)
@@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
 	if (mci.ck) {
 		/* Inject the floating machine check */
 		inti.type = KVM_S390_MCHK;
-		WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+		rc = __inject_vm(vcpu->kvm, &inti);
 	} else {
 		/* Inject the machine check to specified vcpu */
 		irq.type = KVM_S390_MCHK;
-		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+		rc = kvm_s390_inject_vcpu(vcpu, &irq);
 	}
+	WARN_ON_ONCE(rc);
 }
 
 int kvm_set_routing_entry(struct kvm *kvm,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index af09d34..40d0a1a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+struct kvm_s390_tod_clock_ext {
+	__u8 epoch_idx;
+	__u64 tod;
+	__u8 reserved[7];
+} __packed;
+
 /* allow nested virtualization in KVM (if enabled by user space) */
 static int nested;
 module_param(nested, int, S_IRUGO);
@@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
 	return 0;
 }
 
+static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_tod_clock gtod;
+
+	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+		return -EFAULT;
+
+	if (test_kvm_facility(kvm, 139))
+		kvm_s390_set_tod_clock_ext(kvm, &gtod);
+	else if (gtod.epoch_idx == 0)
+		kvm_s390_set_tod_clock(kvm, gtod.tod);
+	else
+		return -EINVAL;
+
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
+		gtod.epoch_idx, gtod.tod);
+
+	return 0;
+}
+
 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	u8 gtod_high;
@@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 		return -EINVAL;
 
 	switch (attr->attr) {
+	case KVM_S390_VM_TOD_EXT:
+		ret = kvm_s390_set_tod_ext(kvm, attr);
+		break;
 	case KVM_S390_VM_TOD_HIGH:
 		ret = kvm_s390_set_tod_high(kvm, attr);
 		break;
@@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 	return ret;
 }
 
+static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
+					struct kvm_s390_vm_tod_clock *gtod)
+{
+	struct kvm_s390_tod_clock_ext htod;
+
+	preempt_disable();
+
+	get_tod_clock_ext((char *)&htod);
+
+	gtod->tod = htod.tod + kvm->arch.epoch;
+	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
+
+	if (gtod->tod < htod.tod)
+		gtod->epoch_idx += 1;
+
+	preempt_enable();
+}
+
+static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_tod_clock gtod;
+
+	memset(&gtod, 0, sizeof(gtod));
+
+	if (test_kvm_facility(kvm, 139))
+		kvm_s390_get_tod_clock_ext(kvm, &gtod);
+	else
+		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
+
+	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+		return -EFAULT;
+
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
+		gtod.epoch_idx, gtod.tod);
+	return 0;
+}
+
 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	u8 gtod_high = 0;
@@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 		return -EINVAL;
 
 	switch (attr->attr) {
+	case KVM_S390_VM_TOD_EXT:
+		ret = kvm_s390_get_tod_ext(kvm, attr);
+		break;
 	case KVM_S390_VM_TOD_HIGH:
 		ret = kvm_s390_get_tod_high(kvm, attr);
 		break;
@@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
 		if (r < 0)
 			pgstev = 0;
 		/* save the value */
-		res[i++] = (pgstev >> 24) & 0x3;
+		res[i++] = (pgstev >> 24) & 0x43;
 		/*
 		 * if the next bit is too far away, stop.
 		 * if we reached the previous "next", find the next one
@@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
 
 		pgstev = bits[i];
 		pgstev = pgstev << 24;
-		mask &= _PGSTE_GPS_USAGE_MASK;
+		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
 	}
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 
+	/* we are always in czam mode - even on pre z14 machines */
+	set_kvm_facility(kvm->arch.model.fac_mask, 138);
+	set_kvm_facility(kvm->arch.model.fac_list, 138);
+	/* we emulate STHYI in kvm */
 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
 	set_kvm_facility(kvm->arch.model.fac_list, 74);
+	if (MACHINE_HAS_TLB_GUEST) {
+		set_kvm_facility(kvm->arch.model.fac_mask, 147);
+		set_kvm_facility(kvm->arch.model.fac_list, 147);
+	}
 
 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
@@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->eca |= ECA_VX;
 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
 	}
+	if (test_kvm_facility(vcpu->kvm, 139))
+		vcpu->arch.sie_block->ecd |= ECD_MEF;
+
 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
 					| SDNXC;
 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
@@ -2447,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	return kvm_s390_vcpu_has_irq(vcpu, 0);
 }
 
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
+}
+
 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
 {
 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
@@ -2855,6 +2940,35 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
+				 const struct kvm_s390_vm_tod_clock *gtod)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_s390_tod_clock_ext htod;
+	int i;
+
+	mutex_lock(&kvm->lock);
+	preempt_disable();
+
+	get_tod_clock_ext((char *)&htod);
+
+	kvm->arch.epoch = gtod->tod - htod.tod;
+	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
+
+	if (kvm->arch.epoch > gtod->tod)
+		kvm->arch.epdx -= 1;
+
+	kvm_s390_vcpu_block_all(kvm);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
+	}
+
+	kvm_s390_vcpu_unblock_all(kvm);
+	preempt_enable();
+	mutex_unlock(&kvm->lock);
+}
+
 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
 {
 	struct kvm_vcpu *vcpu;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6fedc8b..9f8fdd7 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 int handle_sthyi(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
+void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
+				 const struct kvm_s390_vm_tod_clock *gtod);
 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 785ad028..c954ac4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
 		if (pgstev & _PGSTE_GPS_ZERO)
 			res |= 1;
 	}
+	if (pgstev & _PGSTE_GPS_NODAT)
+		res |= 0x20;
 	vcpu->run->s.regs.gprs[r1] = res;
 	/*
 	 * It is possible that all the normal 511 slots were full, in which case
@@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	/* Check for invalid operation request code */
 	orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
-	if (orc > ESSA_MAX)
+	/* ORCs 0-6 are always valid */
+	if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT
+						: ESSA_SET_STABLE_IF_RESIDENT))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	if (likely(!vcpu->kvm->arch.migration_state)) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1a252f5..9d592ef 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
+			   u64 *status_reg)
 {
-	int rc;
 	unsigned int i;
 	struct kvm_vcpu *v;
+	bool all_stopped = true;
 
-	switch (parameter & 0xff) {
-	case 0:
-		rc = SIGP_CC_NOT_OPERATIONAL;
-		break;
-	case 1:
-	case 2:
-		kvm_for_each_vcpu(i, v, vcpu->kvm) {
-			v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
-			kvm_clear_async_pf_completion_queue(v);
-		}
-
-		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-		break;
-	default:
-		rc = -EOPNOTSUPP;
+	kvm_for_each_vcpu(i, v, vcpu->kvm) {
+		if (v == vcpu)
+			continue;
+		if (!is_vcpu_stopped(v))
+			all_stopped = false;
 	}
-	return rc;
+
+	*status_reg &= 0xffffffff00000000UL;
+
+	/* Reject set arch order, with czam we're always in z/Arch mode. */
+	*status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER :
+					SIGP_STATUS_INCORRECT_STATE);
+	return SIGP_CC_STATUS_STORED;
 }
 
 static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
@@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 	switch (order_code) {
 	case SIGP_SET_ARCHITECTURE:
 		vcpu->stat.instruction_sigp_arch++;
-		rc = __sigp_set_arch(vcpu, parameter);
+		rc = __sigp_set_arch(vcpu, parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
 		break;
 	default:
 		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index a2e5c24..395926b 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -436,14 +436,6 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
 	if (addr & ~PAGE_MASK)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	/*
-	 * If the page has not yet been faulted in, we want to do that
-	 * now and not after all the expensive calculations.
-	 */
-	r = write_guest(vcpu, addr, reg2, &cc, 1);
-	if (r)
-		return kvm_s390_inject_prog_cond(vcpu, r);
-
 	sctns = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!sctns)
 		return -ENOMEM;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index ba8203e..b18b565 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		scb_s->eca |= scb_o->eca & ECA_IB;
 	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
 		scb_s->eca |= scb_o->eca & ECA_CEI;
+	/* Epoch Extension */
+	if (test_kvm_facility(vcpu->kvm, 139))
+		scb_s->ecd |= scb_o->ecd & ECD_MEF;
 
 	prepare_ibc(vcpu, vsie_page);
 	rc = shadow_crycb(vcpu, vsie_page);
@@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
-	struct mcck_volatile_info *mcck_info;
-	struct sie_page *sie_page;
 	int rc;
 
 	handle_last_fault(vcpu, vsie_page);
@@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	if (rc == -EINTR) {
 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
-		sie_page = container_of(scb_s, struct sie_page, sie_block);
-		mcck_info = &sie_page->mcck_info;
-		kvm_s390_reinject_machine_check(vcpu, mcck_info);
+		kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
 		return 0;
 	}
 
@@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
 	 */
 	preempt_disable();
 	scb_s->epoch += vcpu->kvm->arch.epoch;
+
+	if (scb_s->ecd & ECD_MEF) {
+		scb_s->epdx += vcpu->kvm->arch.epdx;
+		if (scb_s->epoch < vcpu->kvm->arch.epoch)
+			scb_s->epdx += 1;
+	}
+
 	preempt_enable();
 }
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4198a71..ae677f8 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -919,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 	case ESSA_GET_STATE:
 		break;
 	case ESSA_SET_STABLE:
-		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
 		pgstev |= _PGSTE_GPS_USAGE_STABLE;
 		break;
 	case ESSA_SET_UNUSED:
@@ -965,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 			pgstev |= _PGSTE_GPS_USAGE_STABLE;
 		}
 		break;
+	case ESSA_SET_STABLE_NODAT:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
+		break;
 	default:
 		/* we should never get here! */
 		break;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7b30af5..ddb9923 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -262,10 +262,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 	return rc;
 }
 
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
-
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				       resource_size_t size,
 				       resource_size_t align)
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 29d72bf..70dd8f1 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -83,6 +83,7 @@ static struct facility_def facility_defs[] = {
 			78, /* enhanced-DAT 2 */
 			130, /* instruction-execution-protection */
 			131, /* enhanced-SOP 2 and side-effect */
+			139, /* multiple epoch facility */
 			146, /* msa extension 8 */
 			-1  /* END */
 		}
diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c
index edc2fb7..3246788 100644
--- a/arch/sh/drivers/pci/fixups-cayman.c
+++ b/arch/sh/drivers/pci/fixups-cayman.c
@@ -5,7 +5,7 @@
 #include <cpu/irq.h>
 #include "pci-sh5.h"
 
-int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int result = -1;
 
diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c
index 1d1c5a2..9d597f7 100644
--- a/arch/sh/drivers/pci/fixups-dreamcast.c
+++ b/arch/sh/drivers/pci/fixups-dreamcast.c
@@ -76,7 +76,7 @@ static void gapspci_fixup_resources(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, gapspci_fixup_resources);
 
-int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	/*
 	 * The interrupt routing semantics here are quite trivial.
diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 57ed3f0..2c9b58f 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -15,7 +15,7 @@
 #include <linux/sh_intc.h>
 #include "pci-sh4.h"
 
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	return evt2irq(0xa20) + slot;
 }
diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c
index eaddb56..358ac10 100644
--- a/arch/sh/drivers/pci/fixups-rts7751r2d.c
+++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c
@@ -20,18 +20,18 @@
 #define PCIMCR_MRSET_OFF	0xBFFFFFFF
 #define PCIMCR_RFSH_OFF		0xFFFFFFFB
 
-static u8 rts7751r2d_irq_tab[] __initdata = {
+static u8 rts7751r2d_irq_tab[] = {
 	IRQ_PCI_INTA,
 	IRQ_PCI_INTB,
 	IRQ_PCI_INTC,
 	IRQ_PCI_INTD,
 };
 
-static char lboxre2_irq_tab[] __initdata = {
+static char lboxre2_irq_tab[] = {
 	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
 };
 
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	if (mach_is_lboxre2())
 		return lboxre2_irq_tab[slot];
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index c0a015a..24e96df 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -22,7 +22,7 @@
 #define IRQ_INTD	evt2irq(0xa80)
 
 /* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
-static char sdk7780_irq_tab[4][16] __initdata = {
+static char sdk7780_irq_tab[4][16] = {
 	/* INTA */
 	{ IRQ_INTA, IRQ_INTD, IRQ_INTC, IRQ_INTD, -1, -1, -1, -1, -1, -1,
 	  -1, -1, -1, -1, -1, -1 },
@@ -37,7 +37,7 @@ static char sdk7780_irq_tab[4][16] __initdata = {
 	  -1, -1, -1 },
 };
 
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 {
        return sdk7780_irq_tab[pin-1][slot];
 }
diff --git a/arch/sh/drivers/pci/fixups-se7751.c b/arch/sh/drivers/pci/fixups-se7751.c
index 84a88ca..1cb8d0a 100644
--- a/arch/sh/drivers/pci/fixups-se7751.c
+++ b/arch/sh/drivers/pci/fixups-se7751.c
@@ -7,7 +7,7 @@
 #include <linux/sh_intc.h>
 #include "pci-sh4.h"
 
-int __init pcibios_map_platform_irq(const struct pci_dev *, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *, u8 slot, u8 pin)
 {
         switch (slot) {
         case 0: return evt2irq(0x3a0);
diff --git a/arch/sh/drivers/pci/fixups-sh03.c b/arch/sh/drivers/pci/fixups-sh03.c
index 16207be..55ac1ba 100644
--- a/arch/sh/drivers/pci/fixups-sh03.c
+++ b/arch/sh/drivers/pci/fixups-sh03.c
@@ -4,7 +4,7 @@
 #include <linux/pci.h>
 #include <linux/sh_intc.h>
 
-int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq;
 
diff --git a/arch/sh/drivers/pci/fixups-snapgear.c b/arch/sh/drivers/pci/fixups-snapgear.c
index 6e33ba4..a931e59 100644
--- a/arch/sh/drivers/pci/fixups-snapgear.c
+++ b/arch/sh/drivers/pci/fixups-snapgear.c
@@ -19,7 +19,7 @@
 #include <linux/sh_intc.h>
 #include "pci-sh4.h"
 
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/sh/drivers/pci/fixups-titan.c b/arch/sh/drivers/pci/fixups-titan.c
index bd1addb..a9d563e 100644
--- a/arch/sh/drivers/pci/fixups-titan.c
+++ b/arch/sh/drivers/pci/fixups-titan.c
@@ -19,7 +19,7 @@
 #include <mach/titan.h>
 #include "pci-sh4.h"
 
-static char titan_irq_tab[] __initdata = {
+static char titan_irq_tab[] = {
 	TITAN_IRQ_WAN,
 	TITAN_IRQ_LAN,
 	TITAN_IRQ_MPCIA,
@@ -27,7 +27,7 @@ static char titan_irq_tab[] __initdata = {
 	TITAN_IRQ_USB,
 };
 
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	int irq = titan_irq_tab[slot];
 
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c99ee28..5976a2c 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -39,8 +39,12 @@ static void pcibios_scanbus(struct pci_channel *hose)
 	LIST_HEAD(resources);
 	struct resource *res;
 	resource_size_t offset;
-	int i;
-	struct pci_bus *bus;
+	int i, ret;
+	struct pci_host_bridge *bridge;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return;
 
 	for (i = 0; i < hose->nr_resources; i++) {
 		res = hose->resources + i;
@@ -52,19 +56,26 @@ static void pcibios_scanbus(struct pci_channel *hose)
 		pci_add_resource_offset(&resources, res, offset);
 	}
 
-	bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
-				&resources);
-	hose->bus = bus;
+	list_splice_init(&resources, &bridge->windows);
+	bridge->dev.parent = NULL;
+	bridge->sysdata = hose;
+	bridge->busnr = next_busno;
+	bridge->ops = hose->pci_ops;
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = pcibios_map_platform_irq;
+
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
+		return;
+	}
+
+	hose->bus = bridge->bus;
 
 	need_domain_info = need_domain_info || hose->index;
 	hose->need_domain_info = need_domain_info;
 
-	if (!bus) {
-		pci_free_resource_list(&resources);
-		return;
-	}
-
-	next_busno = bus->busn_res.end + 1;
+	next_busno = hose->bus->busn_res.end + 1;
 	/* Don't allow 8-bit bus number overflow inside the hose -
 	   reserve some space for bridges. */
 	if (next_busno > 224) {
@@ -72,9 +83,9 @@ static void pcibios_scanbus(struct pci_channel *hose)
 		need_domain_info = 1;
 	}
 
-	pci_bus_size_bridges(bus);
-	pci_bus_assign_resources(bus);
-	pci_bus_add_devices(bus);
+	pci_bus_size_bridges(hose->bus);
+	pci_bus_assign_resources(hose->bus);
+	pci_bus_add_devices(hose->bus);
 }
 
 /*
@@ -144,8 +155,6 @@ static int __init pcibios_init(void)
 	for (hose = hose_head; hose; hose = hose->next)
 		pcibios_scanbus(hose);
 
-	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
-
 	dma_debug_add_bus(&pci_bus_type);
 
 	pci_initialized = 1;
@@ -155,14 +164,6 @@ static int __init pcibios_init(void)
 subsys_initcall(pcibios_init);
 
 /*
- *  Called after each bus is probed, but before its children
- *  are examined.
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
-
-/*
  * We need to avoid collisions with `mirrored' VGA ports
  * and other strange ISA hardware, so we always want the
  * addresses to be allocated in the 0x000-0x0ff region
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index a162a7f..0167a73 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -467,7 +467,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 	return 0;
 }
 
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 {
         return evt2irq(0xae0);
 }
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 4371f72..98c223e 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -25,6 +25,12 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
 {
 	LIST_HEAD(resources);
 	struct pci_bus *root_bus;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return;
 
 	pci_add_resource_offset(&resources, &info->io_space,
 				info->io_space.start - 0x1000);
@@ -32,15 +38,21 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
 	info->busn.flags = IORESOURCE_BUS;
 	pci_add_resource(&resources, &info->busn);
 
-	root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info,
-				     &resources);
-	if (!root_bus) {
-		pci_free_resource_list(&resources);
+	list_splice_init(&resources, &bridge->windows);
+	bridge->dev.parent = &ofdev->dev;
+	bridge->sysdata = info;
+	bridge->busnr = 0;
+	bridge->ops = info->ops;
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = info->map_irq;
+
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
 		return;
 	}
 
-	/* Setup IRQs of all devices using custom routines */
-	pci_fixup_irqs(pci_common_swizzle, info->map_irq);
+	root_bus = bridge->bus;
 
 	/* Assign devices with resources */
 	pci_assign_unassigned_resources();
@@ -94,9 +106,3 @@ void pcibios_fixup_bus(struct pci_bus *pbus)
 		}
 	}
 }
-
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 7eceaa1..3f8670c 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -690,16 +690,6 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
 	return bus;
 }
 
-void pcibios_fixup_bus(struct pci_bus *pbus)
-{
-}
-
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, oldcmd;
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 732af9a..4a133c0 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -746,12 +746,6 @@ static void watchdog_reset() {
 }
 #endif
 
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
 	return 0;
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index bc6656b..bbf8157 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -67,16 +67,6 @@ static struct pci_ops tile_cfg_ops;
 
 
 /*
- * We don't need to worry about the alignment of resources.
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-			    resource_size_t size, resource_size_t align)
-{
-	return res->start;
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
-/*
  * Open a FD to the hypervisor PCI device.
  *
  * controller_id is the controller number, config type is 0 or 1 for
@@ -274,6 +264,7 @@ static void fixup_read_and_payload_sizes(void)
  */
 int __init pcibios_init(void)
 {
+	struct pci_host_bridge *bridge;
 	int i;
 
 	pr_info("PCI: Probing PCI hardware\n");
@@ -306,16 +297,26 @@ int __init pcibios_init(void)
 
 			pci_add_resource(&resources, &ioport_resource);
 			pci_add_resource(&resources, &iomem_resource);
-			bus = pci_scan_root_bus(NULL, 0, controller->ops,
-						controller, &resources);
+
+			bridge = pci_alloc_host_bridge(0);
+			if (!bridge)
+				break;
+
+			list_splice_init(&resources, &bridge->windows);
+			bridge->dev.parent = NULL;
+			bridge->sysdata = controller;
+			bridge->busnr = 0;
+			bridge->ops = controller->ops;
+			bridge->swizzle_irq = pci_common_swizzle;
+			bridge->map_irq = tile_map_irq;
+
+			pci_scan_root_bus_bridge(bridge);
+			bus = bridge->bus;
 			controller->root_bus = bus;
 			controller->last_busno = bus->busn_res.end;
 		}
 	}
 
-	/* Do machine dependent PCI interrupt routing */
-	pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
-
 	/*
 	 * This comes from the generic Linux PCI driver.
 	 *
@@ -369,14 +370,6 @@ int __init pcibios_init(void)
 }
 subsys_initcall(pcibios_init);
 
-/*
- * No bus fixups needed.
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-	/* Nothing needs to be done. */
-}
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	/* No special bus mastering setup handling. */
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index b554a68..9aa238a 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -108,15 +108,6 @@ static struct pci_ops tile_cfg_ops;
 /* Mask of CPUs that should receive PCIe interrupts. */
 static struct cpumask intr_cpus_map;
 
-/* We don't need to worry about the alignment of resources. */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-				       resource_size_t size,
-				       resource_size_t align)
-{
-	return res->start;
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
 /*
  * Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #.
  * For now, we simply send interrupts to non-dataplane CPUs.
@@ -669,6 +660,7 @@ int __init pcibios_init(void)
 	resource_size_t offset;
 	LIST_HEAD(resources);
 	int next_busno;
+	struct pci_host_bridge *bridge;
 	int i;
 
 	tile_pci_init();
@@ -881,15 +873,25 @@ int __init pcibios_init(void)
 					controller->mem_offset);
 		pci_add_resource(&resources, &controller->io_space);
 		controller->first_busno = next_busno;
-		bus = pci_scan_root_bus(NULL, next_busno, controller->ops,
-					controller, &resources);
+
+		bridge = pci_alloc_host_bridge(0);
+		if (!bridge)
+			break;
+
+		list_splice_init(&resources, &bridge->windows);
+		bridge->dev.parent = NULL;
+		bridge->sysdata = controller;
+		bridge->busnr = next_busno;
+		bridge->ops = controller->ops;
+		bridge->swizzle_irq = pci_common_swizzle;
+		bridge->map_irq = tile_map_irq;
+
+		pci_scan_root_bus_bridge(bridge);
+		bus = bridge->bus;
 		controller->root_bus = bus;
 		next_busno = bus->busn_res.end + 1;
 	}
 
-	/* Do machine dependent PCI interrupt routing */
-	pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
-
 	/*
 	 * This comes from the generic Linux PCI driver.
 	 *
@@ -1038,11 +1040,6 @@ int __init pcibios_init(void)
 }
 subsys_initcall(pcibios_init);
 
-/* No bus fixups needed. */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
-
 /* Process any "pci=" kernel boot arguments. */
 char *__init pcibios_setup(char *str)
 {
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index 1053bca..9f26840 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -101,7 +101,7 @@ void pci_puv3_preinit(void)
 	writel(readl(PCIBRI_CMD) | PCIBRI_CMD_IO | PCIBRI_CMD_MEM, PCIBRI_CMD);
 }
 
-static int __init pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (dev->bus->number == 0) {
 #ifdef CONFIG_ARCH_FPGA /* 4 pci slots */
@@ -252,19 +252,46 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
+static struct resource busn_resource = {
+	.name	= "PCI busn",
+	.start	= 0,
+	.end	= 255,
+	.flags	= IORESOURCE_BUS,
+};
+
 static int __init pci_common_init(void)
 {
 	struct pci_bus *puv3_bus;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return -ENOMEM;
 
 	pci_puv3_preinit();
 
-	puv3_bus = pci_scan_bus(0, &pci_puv3_ops, NULL);
+	pci_add_resource(&bridge->windows, &ioport_resource);
+	pci_add_resource(&bridge->windows, &iomem_resource);
+	pci_add_resource(&bridge->windows, &busn_resource);
+	bridge->sysdata = NULL;
+	bridge->busnr = 0;
+	bridge->ops = &pci_puv3_ops;
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = pci_puv3_map_irq;
+
+	/* Scan our single hose.  */
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
+		return;
+	}
+
+	puv3_bus = bridge->bus;
 
 	if (!puv3_bus)
 		panic("PCI: unable to scan bus!");
 
-	pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq);
-
 	pci_bus_size_bridges(puv3_bus);
 	pci_bus_assign_resources(puv3_bus);
 	pci_bus_add_devices(puv3_bus);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 42bbbf0..2519c6c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,7 @@
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF	(15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index fde36f1..fa2558e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -219,8 +219,8 @@ struct x86_emulate_ops {
 			 struct x86_instruction_info *info,
 			 enum x86_intercept_stage stage);
 
-	void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
-			  u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
+			  u32 *ecx, u32 *edx, bool check_limit);
 	void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
 
 	unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 369e41c..8844eee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,15 +79,14 @@
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
-#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
 #define CR3_PCID_INVD		 BIT_64(63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
-			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
-			  | X86_CR4_PKE))
+			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
+			  | X86_CR4_SMAP | X86_CR4_PKE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
@@ -204,7 +203,6 @@ enum {
 #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
 
 #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |	\
-				 PFERR_USER_MASK |		\
 				 PFERR_WRITE_MASK |		\
 				 PFERR_PRESENT_MASK)
 
@@ -317,15 +315,17 @@ struct kvm_pio_request {
 	int size;
 };
 
+#define PT64_ROOT_MAX_LEVEL 5
+
 struct rsvd_bits_validate {
-	u64 rsvd_bits_mask[2][4];
+	u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
 	u64 bad_mt_xwr;
 };
 
 /*
- * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
- * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
- * mode.
+ * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
+ * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
+ * current mmu mode.
  */
 struct kvm_mmu {
 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
@@ -548,8 +548,8 @@ struct kvm_vcpu_arch {
 
 	struct kvm_queued_exception {
 		bool pending;
+		bool injected;
 		bool has_error_code;
-		bool reinject;
 		u8 nr;
 		u32 error_code;
 		u8 nested_apf;
@@ -687,8 +687,12 @@ struct kvm_vcpu_arch {
 	int pending_ioapic_eoi;
 	int pending_external_vector;
 
-	/* GPA available (AMD only) */
+	/* GPA available */
 	bool gpa_available;
+	gpa_t gpa_val;
+
+	/* be preempted when it's in kernel-mode(cpl=0) */
+	bool preempted_in_kernel;
 };
 
 struct kvm_lpage_info {
@@ -979,7 +983,7 @@ struct kvm_x86_ops {
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
-	int (*get_tdp_level)(void);
+	int (*get_tdp_level)(struct kvm_vcpu *vcpu);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 	int (*get_lpage_level)(void);
 	bool (*rdtscp_supported)(void);
@@ -1297,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
-static inline u64 get_canonical(u64 la)
-{
-	return ((int64_t)la << 16) >> 16;
-}
-
-static inline bool is_noncanonical_address(u64 la)
-{
-#ifdef CONFIG_X86_64
-	return get_canonical(la) != la;
-#else
-	return false;
-#endif
-}
-
 #define TSS_IOPB_BASE_OFFSET 0x66
 #define TSS_BASE_SIZE 0x68
 #define TSS_IOPB_SIZE (65536 / 8)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 58fffe7..14835dd 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define V_IRQ_SHIFT 8
 #define V_IRQ_MASK (1 << V_IRQ_SHIFT)
 
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
 #define V_INTR_PRIO_SHIFT 16
 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
 
@@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define V_INTR_MASKING_SHIFT 24
 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
 
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
 #define AVIC_ENABLE_SHIFT 31
 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 35cd06f..caec841 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,6 +72,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_RDRAND			0x00000800
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC            0x00002000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
 #define SECONDARY_EXEC_RDSEED			0x00010000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
@@ -114,6 +115,10 @@
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 #define VMX_MISC_ACTIVITY_HLT			0x00000040
 
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
+#define VMFUNC_EPTP_ENTRIES  512
+
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
 {
 	return vmx_basic & GENMASK_ULL(30, 0);
@@ -187,6 +192,8 @@ enum vmcs_field {
 	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
 	POSTED_INTR_DESC_ADDR           = 0x00002016,
 	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
+	VM_FUNCTION_CONTROL             = 0x00002018,
+	VM_FUNCTION_CONTROL_HIGH        = 0x00002019,
 	EPT_POINTER                     = 0x0000201a,
 	EPT_POINTER_HIGH                = 0x0000201b,
 	EOI_EXIT_BITMAP0                = 0x0000201c,
@@ -197,6 +204,8 @@ enum vmcs_field {
 	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
 	EOI_EXIT_BITMAP3                = 0x00002022,
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+	EPTP_LIST_ADDRESS               = 0x00002024,
+	EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
 	XSS_EXIT_BITMAP                 = 0x0000202C,
@@ -444,6 +453,7 @@ enum vmcs_field {
 
 #define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
 #define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
+#define VMX_EPT_PAGE_WALK_5_BIT			(1ull << 7)
 #define VMX_EPTP_UC_BIT				(1ull << 8)
 #define VMX_EPTP_WB_BIT				(1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
@@ -459,12 +469,14 @@ enum vmcs_field {
 #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT      (1ull << 10) /* (42 - 32) */
 #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT   (1ull << 11) /* (43 - 32) */
 
-#define VMX_EPT_DEFAULT_GAW			3
-#define VMX_EPT_MAX_GAW				0x4
 #define VMX_EPT_MT_EPTE_SHIFT			3
-#define VMX_EPT_GAW_EPTP_SHIFT			3
-#define VMX_EPT_AD_ENABLE_BIT			(1ull << 6)
-#define VMX_EPT_DEFAULT_MT			0x6ull
+#define VMX_EPTP_PWL_MASK			0x38ull
+#define VMX_EPTP_PWL_4				0x18ull
+#define VMX_EPTP_PWL_5				0x20ull
+#define VMX_EPTP_AD_ENABLE_BIT			(1ull << 6)
+#define VMX_EPTP_MT_MASK			0x7ull
+#define VMX_EPTP_MT_WB				0x6ull
+#define VMX_EPTP_MT_UC				0x0ull
 #define VMX_EPT_READABLE_MASK			0x1ull
 #define VMX_EPT_WRITABLE_MASK			0x2ull
 #define VMX_EPT_EXECUTABLE_MASK			0x4ull
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 19adbb4..0099e10 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
 	/*
-	 * The existing code assumes virtual address is 48-bit in the canonical
-	 * address checks; exit if it is ever changed.
+	 * The existing code assumes virtual address is 48-bit or 57-bit in the
+	 * canonical address checks; exit if it is ever changed.
 	 */
 	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-	if (best && ((best->eax & 0xff00) >> 8) != 48 &&
-		((best->eax & 0xff00) >> 8) != 0)
-		return -EINVAL;
+	if (best) {
+		int vaddr_bits = (best->eax & 0xff00) >> 8;
+
+		if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
+			return -EINVAL;
+	}
 
 	/* Update physical-address width */
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+	kvm_mmu_reset_context(vcpu);
 
 	kvm_pmu_refresh(vcpu);
 	return 0;
@@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	/* cpuid 7.0.ecx*/
 	const u32 kvm_cpuid_7_0_ecx_x86_features =
-		F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
+		F(AVX512VBMI) | F(LA57) | F(PKU) |
+		0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
 
 	/* cpuid 7.0.edx*/
 	const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
 	return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
 }
 
-void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
+	       u32 *ecx, u32 *edx, bool check_limit)
 {
 	u32 function = *eax, index = *ecx;
 	struct kvm_cpuid_entry2 *best;
+	bool entry_found = true;
 
 	best = kvm_find_cpuid_entry(vcpu, function, index);
 
-	if (!best)
-		best = check_cpuid_limit(vcpu, function, index);
+	if (!best) {
+		entry_found = false;
+		if (!check_limit)
+			goto out;
 
+		best = check_cpuid_limit(vcpu, function, index);
+	}
+
+out:
 	if (best) {
 		*eax = best->eax;
 		*ebx = best->ebx;
@@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
 		*edx = best->edx;
 	} else
 		*eax = *ebx = *ecx = *edx = 0;
-	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
+	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
+	return entry_found;
 }
 EXPORT_SYMBOL_GPL(kvm_cpuid);
 
@@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 
 	eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
-	kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
+	kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
 	kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
 	kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
 	kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index da67283..1ea3c0e 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,6 +3,7 @@
 
 #include "x86.h"
 #include <asm/cpu.h>
+#include <asm/processor.h>
 
 int kvm_update_cpuid(struct kvm_vcpu *vcpu);
 bool kvm_mpx_supported(void);
@@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
 			      struct kvm_cpuid2 *cpuid,
 			      struct kvm_cpuid_entry2 __user *entries);
-void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
+	       u32 *ecx, u32 *edx, bool check_limit);
 
 int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
 
@@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.maxphyaddr;
 }
 
-static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
+struct cpuid_reg {
+	u32 function;
+	u32 index;
+	int reg;
+};
 
-	if (!static_cpu_has(X86_FEATURE_XSAVE))
+static const struct cpuid_reg reverse_cpuid[] = {
+	[CPUID_1_EDX]         = {         1, 0, CPUID_EDX},
+	[CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX},
+	[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
+	[CPUID_1_ECX]         = {         1, 0, CPUID_ECX},
+	[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
+	[CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX},
+	[CPUID_7_0_EBX]       = {         7, 0, CPUID_EBX},
+	[CPUID_D_1_EAX]       = {       0xd, 1, CPUID_EAX},
+	[CPUID_F_0_EDX]       = {       0xf, 0, CPUID_EDX},
+	[CPUID_F_1_EDX]       = {       0xf, 1, CPUID_EDX},
+	[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
+	[CPUID_6_EAX]         = {         6, 0, CPUID_EAX},
+	[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
+	[CPUID_7_ECX]         = {         7, 0, CPUID_ECX},
+	[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
+};
+
+static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
+{
+	unsigned x86_leaf = x86_feature / 32;
+
+	BUILD_BUG_ON(!__builtin_constant_p(x86_leaf));
+	BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
+	BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
+
+	return reverse_cpuid[x86_leaf];
+}
+
+static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature)
+{
+	struct kvm_cpuid_entry2 *entry;
+	const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
+
+	entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
+	if (!entry)
+		return NULL;
+
+	switch (cpuid.reg) {
+	case CPUID_EAX:
+		return &entry->eax;
+	case CPUID_EBX:
+		return &entry->ebx;
+	case CPUID_ECX:
+		return &entry->ecx;
+	case CPUID_EDX:
+		return &entry->edx;
+	default:
+		BUILD_BUG();
+		return NULL;
+	}
+}
+
+static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature)
+{
+	int *reg;
+
+	if (x86_feature == X86_FEATURE_XSAVE &&
+			!static_cpu_has(X86_FEATURE_XSAVE))
 		return false;
 
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+	reg = guest_cpuid_get_register(vcpu, x86_feature);
+	if (!reg)
+		return false;
+
+	return *reg & bit(x86_feature);
 }
 
-static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
+static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature)
 {
-	struct kvm_cpuid_entry2 *best;
+	int *reg;
 
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	return best && (best->edx & bit(X86_FEATURE_MTRR));
-}
-
-static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
-}
-
-static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_SMEP));
-}
-
-static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_SMAP));
-}
-
-static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
-}
-
-static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ecx & bit(X86_FEATURE_PKU));
-}
-
-static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-	return best && (best->edx & bit(X86_FEATURE_LM));
-}
-
-static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-	return best && (best->ecx & bit(X86_FEATURE_OSVW));
-}
-
-static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	return best && (best->ecx & bit(X86_FEATURE_PCID));
-}
-
-static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	return best && (best->ecx & bit(X86_FEATURE_X2APIC));
+	reg = guest_cpuid_get_register(vcpu, x86_feature);
+	if (reg)
+		*reg &= ~bit(x86_feature);
 }
 
 static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
@@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
 	return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
 }
 
-static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-	return best && (best->edx & bit(X86_FEATURE_GBPAGES));
-}
-
-static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_RTM));
-}
-
-static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_MPX));
-}
-
-static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-	return best && (best->edx & bit(X86_FEATURE_RDTSCP));
-}
-
-/*
- * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
- */
-#define BIT_NRIPS	3
-
-static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
-
-	/*
-	 * NRIPS is a scattered cpuid feature, so we can't use
-	 * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit
-	 * position 8, not 3).
-	 */
-	return best && (best->edx & bit(BIT_NRIPS));
-}
-#undef BIT_NRIPS
-
 static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fb00559..16bf665 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -28,6 +28,7 @@
 
 #include "x86.h"
 #include "tss.h"
+#include "mmu.h"
 
 /*
  * Operand types
@@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 	ulong la;
 	u32 lim;
 	u16 sel;
+	u8  va_bits;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	*max_size = 0;
 	switch (mode) {
 	case X86EMUL_MODE_PROT64:
 		*linear = la;
-		if (is_noncanonical_address(la))
+		va_bits = ctxt_virt_addr_bits(ctxt);
+		if (get_canonical(la, va_bits) != la)
 			goto bad;
 
-		*max_size = min_t(u64, ~0u, (1ull << 48) - la);
+		*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
 		if (size > *max_size)
 			goto bad;
 		break;
@@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				sizeof(base3), &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
-		if (is_noncanonical_address(get_desc_base(&seg_desc) |
-					     ((u64)base3 << 32)))
+		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+				((u64)base3 << 32), ctxt))
 			return emulate_gp(ctxt, 0);
 	}
 load:
@@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
 
 	eax = 0x80000001;
 	ecx = 0;
-	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
 	return edx & bit(X86_FEATURE_LM);
 }
 
@@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
 	u32 eax, ebx, ecx, edx;
 
 	eax = ecx = 0;
-	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
 	return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
 		&& ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
 		&& edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
@@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
 
 	eax = 0x00000000;
 	ecx = 0x00000000;
-	ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+	ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
 	/*
 	 * Intel ("GenuineIntel")
 	 * remark: Intel CPUs only support "syscall" in 64bit
@@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 		ss_sel = cs_sel + 8;
 		cs.d = 0;
 		cs.l = 1;
-		if (is_noncanonical_address(rcx) ||
-		    is_noncanonical_address(rdx))
+		if (emul_is_noncanonical_address(rcx, ctxt) ||
+		    emul_is_noncanonical_address(rdx, ctxt))
 			return emulate_gp(ctxt, 0);
 		break;
 	}
@@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
 	/*
 	 * Check MOVBE is set in the guest-visible CPUID leaf.
 	 */
-	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
 	if (!(ecx & FFL(MOVBE)))
 		return emulate_ud(ctxt);
 
@@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
-	    is_noncanonical_address(desc_ptr.address))
+	    emul_is_noncanonical_address(desc_ptr.address, ctxt))
 		return emulate_gp(ctxt, 0);
 	if (lgdt)
 		ctxt->ops->set_gdt(ctxt, &desc_ptr);
@@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
 
 	eax = reg_read(ctxt, VCPU_REGS_RAX);
 	ecx = reg_read(ctxt, VCPU_REGS_RCX);
-	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
 	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
 	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
 	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
@@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
 {
 	u32 eax = 1, ebx, ecx = 0, edx;
 
-	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
 	if (!(edx & FFL(FXSR)))
 		return emulate_ud(ctxt);
 
@@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 		u64 rsvd = 0;
 
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
-		if (efer & EFER_LMA)
-			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
+		if (efer & EFER_LMA) {
+			u64 maxphyaddr;
+			u32 eax = 0x80000008;
+
+			if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
+						 NULL, false))
+				maxphyaddr = eax & 0xff;
+			else
+				maxphyaddr = 36;
+			rsvd = rsvd_bits(maxphyaddr, 62);
+		}
 
 		if (new_val & rsvd)
 			return emulate_gp(ctxt, 0);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 337b6d2..dc97f25 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
 					pdata);
 	}
+	case HV_X64_MSR_TSC_FREQUENCY:
+		data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
+		break;
+	case HV_X64_MSR_APIC_FREQUENCY:
+		data = APIC_BUS_FREQUENCY;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
 	switch (code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
-		kvm_vcpu_on_spin(vcpu);
+		kvm_vcpu_on_spin(vcpu, true);
 		break;
 	case HVCALL_POST_MESSAGE:
 	case HVCALL_SIGNAL_EVENT:
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index e1e89ee..9add410 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -4,7 +4,7 @@
 #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
 #define KVM_POSSIBLE_CR4_GUEST_BITS				  \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
-	 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
 
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
 					      enum kvm_reg reg)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 589dcc1..aaf10b6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -54,8 +54,6 @@
 #define PRIu64 "u"
 #define PRIo64 "o"
 
-#define APIC_BUS_CYCLE_NS 1
-
 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
 #define apic_debug(fmt, arg...)
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 29caa2c..215721e1 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,6 +12,9 @@
 #define KVM_APIC_SHORT_MASK	0xc0000
 #define KVM_APIC_DEST_MASK	0x800
 
+#define APIC_BUS_CYCLE_NS       1
+#define APIC_BUS_FREQUENCY      (1000000000ULL / APIC_BUS_CYCLE_NS)
+
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 04d7508..eca30c1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2169,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
 }
 
 struct mmu_page_path {
-	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
-	unsigned int idx[PT64_ROOT_LEVEL];
+	struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
+	unsigned int idx[PT64_ROOT_MAX_LEVEL];
 };
 
 #define for_each_sp(pvec, sp, parents, i)			\
@@ -2385,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 	iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
 	iterator->level = vcpu->arch.mmu.shadow_root_level;
 
-	if (iterator->level == PT64_ROOT_LEVEL &&
-	    vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL &&
+	if (iterator->level == PT64_ROOT_4LEVEL &&
+	    vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
 	    !vcpu->arch.mmu.direct_map)
 		--iterator->level;
 
@@ -2610,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
 
 	sp = list_last_entry(&kvm->arch.active_mmu_pages,
 			     struct kvm_mmu_page, link);
-	kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
-
-	return true;
+	return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
 }
 
 /*
@@ -3262,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 			 gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
-static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
 
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 			 gfn_t gfn, bool prefault)
@@ -3302,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
-	make_mmu_pages_available(vcpu);
+	if (make_mmu_pages_available(vcpu) < 0)
+		goto out_unlock;
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
@@ -3326,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
 
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
-	    (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
+	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
+	    (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
 	     vcpu->arch.mmu.direct_map)) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 
@@ -3379,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	struct kvm_mmu_page *sp;
 	unsigned i;
 
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
 		spin_lock(&vcpu->kvm->mmu_lock);
-		make_mmu_pages_available(vcpu);
-		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
+		if(make_mmu_pages_available(vcpu) < 0) {
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			return 1;
+		}
+		sp = kvm_mmu_get_page(vcpu, 0, 0,
+				vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@@ -3392,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
 			MMU_WARN_ON(VALID_PAGE(root));
 			spin_lock(&vcpu->kvm->mmu_lock);
-			make_mmu_pages_available(vcpu);
+			if (make_mmu_pages_available(vcpu) < 0) {
+				spin_unlock(&vcpu->kvm->mmu_lock);
+				return 1;
+			}
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
 					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
 			root = __pa(sp->spt);
@@ -3423,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * Do we shadow a long mode page table? If so we need to
 	 * write-protect the guests page table root.
 	 */
-	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 
 		MMU_WARN_ON(VALID_PAGE(root));
 
 		spin_lock(&vcpu->kvm->mmu_lock);
-		make_mmu_pages_available(vcpu);
-		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
-				      0, ACC_ALL);
+		if (make_mmu_pages_available(vcpu) < 0) {
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			return 1;
+		}
+		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+				vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3445,7 +3454,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * the shadow page table may be a PAE or a long mode page table.
 	 */
 	pm_mask = PT_PRESENT_MASK;
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL)
+	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 	for (i = 0; i < 4; ++i) {
@@ -3463,7 +3472,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 				return 1;
 		}
 		spin_lock(&vcpu->kvm->mmu_lock);
-		make_mmu_pages_available(vcpu);
+		if (make_mmu_pages_available(vcpu) < 0) {
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			return 1;
+		}
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
 				      0, ACC_ALL);
 		root = __pa(sp->spt);
@@ -3478,7 +3490,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * If we shadow a 32 bit page table with a long mode page
 	 * table we enter this path.
 	 */
-	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
 		if (vcpu->arch.mmu.lm_root == NULL) {
 			/*
 			 * The additional page necessary for this is only
@@ -3523,7 +3535,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 
 	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
-	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 		sp = page_header(root);
 		mmu_sync_children(vcpu, sp);
@@ -3588,6 +3600,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
 
 static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
+	/*
+	 * A nested guest cannot use the MMIO cache if it is using nested
+	 * page tables, because cr2 is a nGPA while the cache stores GPAs.
+	 */
+	if (mmu_is_nested(vcpu))
+		return false;
+
 	if (direct)
 		return vcpu_match_mmio_gpa(vcpu, addr);
 
@@ -3599,7 +3618,7 @@ static bool
 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+	u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
 	int root, leaf;
 	bool reserved = false;
 
@@ -3640,7 +3659,23 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	return reserved;
 }
 
-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+/*
+ * Return values of handle_mmio_page_fault:
+ * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
+ *			directly.
+ * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
+ *			fault path update the mmio spte.
+ * RET_MMIO_PF_RETRY: let CPU fault again on the address.
+ * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
+ */
+enum {
+	RET_MMIO_PF_EMULATE = 1,
+	RET_MMIO_PF_INVALID = 2,
+	RET_MMIO_PF_RETRY = 0,
+	RET_MMIO_PF_BUG = -1
+};
+
+static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	u64 spte;
 	bool reserved;
@@ -3872,7 +3907,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
-	make_mmu_pages_available(vcpu);
+	if (make_mmu_pages_available(vcpu) < 0)
+		goto out_unlock;
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
@@ -4025,7 +4061,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 		rsvd_check->rsvd_bits_mask[1][0] =
 			rsvd_check->rsvd_bits_mask[0][0];
 		break;
-	case PT64_ROOT_LEVEL:
+	case PT64_ROOT_5LEVEL:
+		rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[1][4] =
+			rsvd_check->rsvd_bits_mask[0][4];
+	case PT64_ROOT_4LEVEL:
 		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
 			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
 			rsvd_bits(maxphyaddr, 51);
@@ -4055,7 +4097,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 {
 	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
 				cpuid_maxphyaddr(vcpu), context->root_level,
-				context->nx, guest_cpuid_has_gbpages(vcpu),
+				context->nx,
+				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
 				is_pse(vcpu), guest_cpuid_is_amd(vcpu));
 }
 
@@ -4065,6 +4108,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
 {
 	u64 bad_mt_xwr;
 
+	rsvd_check->rsvd_bits_mask[0][4] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
 	rsvd_check->rsvd_bits_mask[0][3] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
 	rsvd_check->rsvd_bits_mask[0][2] =
@@ -4074,6 +4119,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
 	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
 
 	/* large page */
+	rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
 	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
 	rsvd_check->rsvd_bits_mask[1][2] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
@@ -4120,8 +4166,8 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 	__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
 				boot_cpu_data.x86_phys_bits,
 				context->shadow_root_level, uses_nx,
-				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
-				true);
+				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+				is_pse(vcpu), true);
 
 	if (!shadow_me_mask)
 		return;
@@ -4185,66 +4231,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 				    boot_cpu_data.x86_phys_bits, execonly);
 }
 
+#define BYTE_MASK(access) \
+	((1 & (access) ? 2 : 0) | \
+	 (2 & (access) ? 4 : 0) | \
+	 (3 & (access) ? 8 : 0) | \
+	 (4 & (access) ? 16 : 0) | \
+	 (5 & (access) ? 32 : 0) | \
+	 (6 & (access) ? 64 : 0) | \
+	 (7 & (access) ? 128 : 0))
+
+
 static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 				      struct kvm_mmu *mmu, bool ept)
 {
-	unsigned bit, byte, pfec;
-	u8 map;
-	bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
+	unsigned byte;
 
-	cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
-	cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+	const u8 x = BYTE_MASK(ACC_EXEC_MASK);
+	const u8 w = BYTE_MASK(ACC_WRITE_MASK);
+	const u8 u = BYTE_MASK(ACC_USER_MASK);
+
+	bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
+	bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
+	bool cr0_wp = is_write_protection(vcpu);
+
 	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
-		pfec = byte << 1;
-		map = 0;
-		wf = pfec & PFERR_WRITE_MASK;
-		uf = pfec & PFERR_USER_MASK;
-		ff = pfec & PFERR_FETCH_MASK;
+		unsigned pfec = byte << 1;
+
 		/*
-		 * PFERR_RSVD_MASK bit is set in PFEC if the access is not
-		 * subject to SMAP restrictions, and cleared otherwise. The
-		 * bit is only meaningful if the SMAP bit is set in CR4.
+		 * Each "*f" variable has a 1 bit for each UWX value
+		 * that causes a fault with the given PFEC.
 		 */
-		smapf = !(pfec & PFERR_RSVD_MASK);
-		for (bit = 0; bit < 8; ++bit) {
-			x = bit & ACC_EXEC_MASK;
-			w = bit & ACC_WRITE_MASK;
-			u = bit & ACC_USER_MASK;
 
-			if (!ept) {
-				/* Not really needed: !nx will cause pte.nx to fault */
-				x |= !mmu->nx;
-				/* Allow supervisor writes if !cr0.wp */
-				w |= !is_write_protection(vcpu) && !uf;
-				/* Disallow supervisor fetches of user code if cr4.smep */
-				x &= !(cr4_smep && u && !uf);
+		/* Faults from writes to non-writable pages */
+		u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
+		/* Faults from user mode accesses to supervisor pages */
+		u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
+		/* Faults from fetches of non-executable pages*/
+		u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
+		/* Faults from kernel mode fetches of user pages */
+		u8 smepf = 0;
+		/* Faults from kernel mode accesses of user pages */
+		u8 smapf = 0;
 
-				/*
-				 * SMAP:kernel-mode data accesses from user-mode
-				 * mappings should fault. A fault is considered
-				 * as a SMAP violation if all of the following
-				 * conditions are ture:
-				 *   - X86_CR4_SMAP is set in CR4
-				 *   - A user page is accessed
-				 *   - Page fault in kernel mode
-				 *   - if CPL = 3 or X86_EFLAGS_AC is clear
-				 *
-				 *   Here, we cover the first three conditions.
-				 *   The fourth is computed dynamically in
-				 *   permission_fault() and is in smapf.
-				 *
-				 *   Also, SMAP does not affect instruction
-				 *   fetches, add the !ff check here to make it
-				 *   clearer.
-				 */
-				smap = cr4_smap && u && !uf && !ff;
-			}
+		if (!ept) {
+			/* Faults from kernel mode accesses to user pages */
+			u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
 
-			fault = (ff && !x) || (uf && !u) || (wf && !w) ||
-				(smapf && smap);
-			map |= fault << bit;
+			/* Not really needed: !nx will cause pte.nx to fault */
+			if (!mmu->nx)
+				ff = 0;
+
+			/* Allow supervisor writes if !cr0.wp */
+			if (!cr0_wp)
+				wf = (pfec & PFERR_USER_MASK) ? wf : 0;
+
+			/* Disallow supervisor fetches of user code if cr4.smep */
+			if (cr4_smep)
+				smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
+
+			/*
+			 * SMAP:kernel-mode data accesses from user-mode
+			 * mappings should fault. A fault is considered
+			 * as a SMAP violation if all of the following
+			 * conditions are ture:
+			 *   - X86_CR4_SMAP is set in CR4
+			 *   - A user page is accessed
+			 *   - The access is not a fetch
+			 *   - Page fault in kernel mode
+			 *   - if CPL = 3 or X86_EFLAGS_AC is clear
+			 *
+			 * Here, we cover the first three conditions.
+			 * The fourth is computed dynamically in permission_fault();
+			 * PFERR_RSVD_MASK bit will be set in PFEC if the access is
+			 * *not* subject to SMAP restrictions.
+			 */
+			if (cr4_smap)
+				smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
 		}
-		mmu->permissions[byte] = map;
+
+		mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
 	}
 }
 
@@ -4358,7 +4423,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 static void paging64_init_context(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu *context)
 {
-	paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
+	int root_level = is_la57_mode(vcpu) ?
+			 PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+
+	paging64_init_context_common(vcpu, context, root_level);
 }
 
 static void paging32_init_context(struct kvm_vcpu *vcpu,
@@ -4399,7 +4467,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = nonpaging_invlpg;
 	context->update_pte = nonpaging_update_pte;
-	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
+	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
 	context->root_hpa = INVALID_PAGE;
 	context->direct_map = true;
 	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
@@ -4413,7 +4481,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->root_level = 0;
 	} else if (is_long_mode(vcpu)) {
 		context->nx = is_nx(vcpu);
-		context->root_level = PT64_ROOT_LEVEL;
+		context->root_level = is_la57_mode(vcpu) ?
+				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	} else if (is_pae(vcpu)) {
@@ -4470,7 +4539,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 
 	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
 
-	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
+	context->shadow_root_level = PT64_ROOT_4LEVEL;
 
 	context->nx = true;
 	context->ept_ad = accessed_dirty;
@@ -4479,7 +4548,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->sync_page = ept_sync_page;
 	context->invlpg = ept_invlpg;
 	context->update_pte = ept_update_pte;
-	context->root_level = context->shadow_root_level;
+	context->root_level = PT64_ROOT_4LEVEL;
 	context->root_hpa = INVALID_PAGE;
 	context->direct_map = false;
 	context->base_role.ad_disabled = !accessed_dirty;
@@ -4524,7 +4593,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
 	} else if (is_long_mode(vcpu)) {
 		g_context->nx = is_nx(vcpu);
-		g_context->root_level = PT64_ROOT_LEVEL;
+		g_context->root_level = is_la57_mode(vcpu) ?
+					PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
 	} else if (is_pae(vcpu)) {
@@ -4814,12 +4884,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
 
-static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 {
 	LIST_HEAD(invalid_list);
 
 	if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
-		return;
+		return 0;
 
 	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
 		if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
@@ -4828,6 +4898,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
 		++vcpu->kvm->stat.mmu_recycled;
 	}
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+
+	if (!kvm_mmu_available_pages(vcpu->kvm))
+		return -ENOSPC;
+	return 0;
 }
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
@@ -4835,7 +4909,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 {
 	int r, emulation_type = EMULTYPE_RETRY;
 	enum emulation_result er;
-	bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
+	bool direct = vcpu->arch.mmu.direct_map;
+
+	/* With shadow page tables, fault_address contains a GVA or nGPA.  */
+	if (vcpu->arch.mmu.direct_map) {
+		vcpu->arch.gpa_available = true;
+		vcpu->arch.gpa_val = cr2;
+	}
 
 	if (unlikely(error_code & PFERR_RSVD_MASK)) {
 		r = handle_mmio_page_fault(vcpu, cr2, direct);
@@ -4847,6 +4927,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 			return 1;
 		if (r < 0)
 			return r;
+		/* Must be RET_MMIO_PF_INVALID.  */
 	}
 
 	r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
@@ -4862,11 +4943,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	 * This can occur when using nested virtualization with nested
 	 * paging in both guests. If true, we simply unprotect the page
 	 * and resume the guest.
-	 *
-	 * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used
-	 *       in PFERR_NEXT_GUEST_PAGE)
 	 */
-	if (error_code == PFERR_NESTED_GUEST_PAGE) {
+	if (vcpu->arch.mmu.direct_map &&
+	    (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
 		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
 		return 1;
 	}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 4b9a3ae..64a2dbd 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -37,7 +37,8 @@
 #define PT32_DIR_PSE36_MASK \
 	(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
 
-#define PT64_ROOT_LEVEL 4
+#define PT64_ROOT_5LEVEL 5
+#define PT64_ROOT_4LEVEL 4
 #define PT32_ROOT_LEVEL 2
 #define PT32E_ROOT_LEVEL 3
 
@@ -48,6 +49,9 @@
 
 static inline u64 rsvd_bits(int s, int e)
 {
+	if (e < s)
+		return 0;
+
 	return ((1ULL << (e - s + 1)) - 1) << s;
 }
 
@@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
-/*
- * Return values of handle_mmio_page_fault:
- * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
- *			directly.
- * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
- *			fault path update the mmio spte.
- * RET_MMIO_PF_RETRY: let CPU fault again on the address.
- * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
- */
-enum {
-	RET_MMIO_PF_EMULATE = 1,
-	RET_MMIO_PF_INVALID = 2,
-	RET_MMIO_PF_RETRY = 0,
-	RET_MMIO_PF_BUG = -1
-};
-
-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty);
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index dcce533..d22ddbd 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
 
-	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 
 		sp = page_header(root);
-		__mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
+		__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
 		return;
 	}
 
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 0149ac5..e9ea2d4 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
 	 * enable MTRRs and it is obviously undesirable to run the
 	 * guest entirely with UC memory and we use WB.
 	 */
-	if (guest_cpuid_has_mtrr(vcpu))
+	if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR))
 		return MTRR_TYPE_UNCACHABLE;
 	else
 		return MTRR_TYPE_WRBACK;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b0454c7..86b68dc 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 			 &map_writable))
 		return 0;
 
-	if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
-				walker.gfn, pfn, walker.pte_access, &r))
+	if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
 		return r;
 
 	/*
@@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 		goto out_unlock;
 
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
-	make_mmu_pages_available(vcpu);
+	if (make_mmu_pages_available(vcpu) < 0)
+		goto out_unlock;
 	if (!force_pt_level)
 		transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8dbd8db..2c1cfe6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO);
 static int vls = true;
 module_param(vls, int, 0444);
 
-/* AVIC VM ID bit masks and lock */
-static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
-static DEFINE_SPINLOCK(avic_vm_id_lock);
+/* enable/disable Virtual GIF */
+static int vgif = true;
+module_param(vgif, int, 0444);
 
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
 	recalc_intercepts(svm);
 }
 
+static inline bool vgif_enabled(struct vcpu_svm *svm)
+{
+	return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
+}
+
 static inline void enable_gif(struct vcpu_svm *svm)
 {
-	svm->vcpu.arch.hflags |= HF_GIF_MASK;
+	if (vgif_enabled(svm))
+		svm->vmcb->control.int_ctl |= V_GIF_MASK;
+	else
+		svm->vcpu.arch.hflags |= HF_GIF_MASK;
 }
 
 static inline void disable_gif(struct vcpu_svm *svm)
 {
-	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+	if (vgif_enabled(svm))
+		svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
+	else
+		svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
 }
 
 static inline bool gif_set(struct vcpu_svm *svm)
 {
-	return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
+	if (vgif_enabled(svm))
+		return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
+	else
+		return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static unsigned long iopm_base;
@@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid)
 	asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
 }
 
-static int get_npt_level(void)
+static int get_npt_level(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
-	return PT64_ROOT_LEVEL;
+	return PT64_ROOT_4LEVEL;
 #else
 	return PT32E_ROOT_LEVEL;
 #endif
@@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
 	bool has_error_code = vcpu->arch.exception.has_error_code;
-	bool reinject = vcpu->arch.exception.reinject;
+	bool reinject = vcpu->arch.exception.injected;
 	u32 error_code = vcpu->arch.exception.error_code;
 
 	/*
@@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 static void disable_nmi_singlestep(struct vcpu_svm *svm)
 {
 	svm->nmi_singlestep = false;
+
 	if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
 		/* Clear our flags if they were not set by the guest */
 		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
@@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
  */
 #define SVM_VM_DATA_HASH_BITS	8
 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
+static u32 next_vm_id = 0;
+static bool next_vm_id_wrapped = 0;
 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
 
 /* Note:
@@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void)
 		}
 	}
 
+	if (vgif) {
+		if (!boot_cpu_has(X86_FEATURE_VGIF))
+			vgif = false;
+		else
+			pr_info("Virtual GIF supported\n");
+	}
+
 	return 0;
 
 err:
@@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm)
 		svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 	}
 
+	if (vgif) {
+		clr_intercept(svm, INTERCEPT_STGI);
+		clr_intercept(svm, INTERCEPT_CLGI);
+		svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
+	}
+
 	mark_all_dirty(svm->vmcb);
 
 	enable_gif(svm);
@@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static inline int avic_get_next_vm_id(void)
-{
-	int id;
-
-	spin_lock(&avic_vm_id_lock);
-
-	/* AVIC VM ID is one-based. */
-	id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
-	if (id <= AVIC_VM_ID_MASK)
-		__set_bit(id, avic_vm_id_bitmap);
-	else
-		id = -EAGAIN;
-
-	spin_unlock(&avic_vm_id_lock);
-	return id;
-}
-
-static inline int avic_free_vm_id(int id)
-{
-	if (id <= 0 || id > AVIC_VM_ID_MASK)
-		return -EINVAL;
-
-	spin_lock(&avic_vm_id_lock);
-	__clear_bit(id, avic_vm_id_bitmap);
-	spin_unlock(&avic_vm_id_lock);
-	return 0;
-}
-
 static void avic_vm_destroy(struct kvm *kvm)
 {
 	unsigned long flags;
@@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm)
 	if (!avic)
 		return;
 
-	avic_free_vm_id(vm_data->avic_vm_id);
-
 	if (vm_data->avic_logical_id_table_page)
 		__free_page(vm_data->avic_logical_id_table_page);
 	if (vm_data->avic_physical_id_table_page)
@@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm)
 static int avic_vm_init(struct kvm *kvm)
 {
 	unsigned long flags;
-	int vm_id, err = -ENOMEM;
+	int err = -ENOMEM;
 	struct kvm_arch *vm_data = &kvm->arch;
 	struct page *p_page;
 	struct page *l_page;
+	struct kvm_arch *ka;
+	u32 vm_id;
 
 	if (!avic)
 		return 0;
 
-	vm_id = avic_get_next_vm_id();
-	if (vm_id < 0)
-		return vm_id;
-	vm_data->avic_vm_id = (u32)vm_id;
-
 	/* Allocating physical APIC ID table (4KB) */
 	p_page = alloc_page(GFP_KERNEL);
 	if (!p_page)
@@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm)
 	clear_page(page_address(l_page));
 
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+ again:
+	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
+	if (vm_id == 0) { /* id is 1-based, zero is not okay */
+		next_vm_id_wrapped = 1;
+		goto again;
+	}
+	/* Is it still in use? Only possible if wrapped at least once */
+	if (next_vm_id_wrapped) {
+		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
+			struct kvm *k2 = container_of(ka, struct kvm, arch);
+			struct kvm_arch *vd2 = &k2->arch;
+			if (vd2->avic_vm_id == vm_id)
+				goto again;
+		}
+	}
+	vm_data->avic_vm_id = vm_id;
 	hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 
@@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	}
 	init_vmcb(svm);
 
-	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
+	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
 
 	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
@@ -2384,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
-	vcpu->arch.mmu.shadow_root_level = get_npt_level();
+	vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
 	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
@@ -3147,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm)
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
+	/*
+	 * If VGIF is enabled, the STGI intercept is only added to
+	 * detect the opening of the NMI window; remove it now.
+	 */
+	if (vgif_enabled(svm))
+		clr_intercept(svm, INTERCEPT_STGI);
+
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
@@ -3744,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 
 static int pause_interception(struct vcpu_svm *svm)
 {
-	kvm_vcpu_on_spin(&(svm->vcpu));
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	bool in_kernel = (svm_get_cpl(vcpu) == 0);
+
+	kvm_vcpu_on_spin(vcpu, in_kernel);
 	return 1;
 }
 
@@ -4228,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
 	trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-	vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
-
 	if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
 	if (npt_enabled)
@@ -4682,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
 	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
 	 * get that intercept, this function will be called again though and
-	 * we'll get the vintr intercept.
+	 * we'll get the vintr intercept. However, if the vGIF feature is
+	 * enabled, the STGI interception will not occur. Enable the irq
+	 * window under the assumption that the hardware will set the GIF.
 	 */
-	if (gif_set(svm) && nested_svm_intr(svm)) {
+	if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
 		svm_set_vintr(svm);
 		svm_inject_irq(svm, 0x0);
 	}
@@ -4698,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	    == HF_NMI_MASK)
 		return; /* IRET will cause a vm exit */
 
-	if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+	if (!gif_set(svm)) {
+		if (vgif_enabled(svm))
+			set_intercept(svm, INTERCEPT_STGI);
 		return; /* STGI will cause a vm exit */
+	}
 
 	if (svm->nested.exit_required)
 		return; /* we're not going to run the guest yet */
@@ -5071,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct kvm_cpuid_entry2 *entry;
 
 	/* Update nrips enabled cache */
-	svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
+	svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
 
 	if (!kvm_vcpu_apicv_active(vcpu))
 		return;
 
-	entry = kvm_find_cpuid_entry(vcpu, 1, 0);
-	if (entry)
-		entry->ecx &= ~bit(X86_FEATURE_X2APIC);
+	guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
 }
 
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0a6cc67..8a202c4 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio,
  */
 TRACE_EVENT(kvm_cpuid,
 	TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
-		 unsigned long rcx, unsigned long rdx),
-	TP_ARGS(function, rax, rbx, rcx, rdx),
+		 unsigned long rcx, unsigned long rdx, bool found),
+	TP_ARGS(function, rax, rbx, rcx, rdx, found),
 
 	TP_STRUCT__entry(
 		__field(	unsigned int,	function	)
@@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid,
 		__field(	unsigned long,	rbx		)
 		__field(	unsigned long,	rcx		)
 		__field(	unsigned long,	rdx		)
+		__field(	bool,		found		)
 	),
 
 	TP_fast_assign(
@@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid,
 		__entry->rbx		= rbx;
 		__entry->rcx		= rcx;
 		__entry->rdx		= rdx;
+		__entry->found		= found;
 	),
 
-	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
+	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s",
 		  __entry->function, __entry->rax,
-		  __entry->rbx, __entry->rcx, __entry->rdx)
+		  __entry->rbx, __entry->rcx, __entry->rdx,
+		  __entry->found ? "found" : "not found")
 );
 
 #define AREG(x) { APIC_##x, "APIC_" #x }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70b90c0..4253ade 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -122,7 +122,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-	 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -243,11 +243,13 @@ struct __packed vmcs12 {
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 posted_intr_desc_addr;
+	u64 vm_function_control;
 	u64 ept_pointer;
 	u64 eoi_exit_bitmap0;
 	u64 eoi_exit_bitmap1;
 	u64 eoi_exit_bitmap2;
 	u64 eoi_exit_bitmap3;
+	u64 eptp_list_address;
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
@@ -481,6 +483,7 @@ struct nested_vmx {
 	u64 nested_vmx_cr4_fixed0;
 	u64 nested_vmx_cr4_fixed1;
 	u64 nested_vmx_vmcs_enum;
+	u64 nested_vmx_vmfunc_controls;
 };
 
 #define POSTED_INTR_ON  0
@@ -573,6 +576,8 @@ struct vcpu_vmx {
 #endif
 	u32 vm_entry_controls_shadow;
 	u32 vm_exit_controls_shadow;
+	u32 secondary_exec_control;
+
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
 	 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -761,11 +766,13 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
 	FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
+	FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
 	FIELD64(EPT_POINTER, ept_pointer),
 	FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
 	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
 	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
 	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+	FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -889,25 +896,6 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 	return to_vmx(vcpu)->nested.cached_vmcs12;
 }
 
-static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
-{
-	struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
-	if (is_error_page(page))
-		return NULL;
-
-	return page;
-}
-
-static void nested_release_page(struct page *page)
-{
-	kvm_release_page_dirty(page);
-}
-
-static void nested_release_page_clean(struct page *page)
-{
-	kvm_release_page_clean(page);
-}
-
 static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
 static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
@@ -1212,6 +1200,16 @@ static inline bool cpu_has_vmx_ept_4levels(void)
 	return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
 }
 
+static inline bool cpu_has_vmx_ept_mt_wb(void)
+{
+	return vmx_capability.ept & VMX_EPTP_WB_BIT;
+}
+
+static inline bool cpu_has_vmx_ept_5levels(void)
+{
+	return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
+}
+
 static inline bool cpu_has_vmx_ept_ad_bits(void)
 {
 	return vmx_capability.ept & VMX_EPT_AD_BIT;
@@ -1317,6 +1315,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
 		SECONDARY_EXEC_TSC_SCALING;
 }
 
+static inline bool cpu_has_vmx_vmfunc(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_VMFUNC;
+}
+
 static inline bool report_flexpriority(void)
 {
 	return flexpriority_enabled;
@@ -1357,8 +1361,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 
 static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 {
-	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
-		vmx_xsaves_supported();
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 }
 
 static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
@@ -1391,6 +1394,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
 	return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
 }
 
+static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
+}
+
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has_vmfunc(vmcs12) &&
+		(vmcs12->vm_function_control &
+		 VMX_VMFUNC_EPTP_SWITCHING);
+}
+
 static inline bool is_nmi(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2450,15 +2465,14 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	unsigned int nr = vcpu->arch.exception.nr;
 
 	if (nr == PF_VECTOR) {
 		if (vcpu->arch.exception.nested_apf) {
-			nested_vmx_inject_exception_vmexit(vcpu,
-							   vcpu->arch.apf.nested_apf_token);
+			*exit_qual = vcpu->arch.apf.nested_apf_token;
 			return 1;
 		}
 		/*
@@ -2472,16 +2486,15 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 		 */
 		if (nested_vmx_is_page_fault_vmexit(vmcs12,
 						    vcpu->arch.exception.error_code)) {
-			nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
+			*exit_qual = vcpu->arch.cr2;
 			return 1;
 		}
 	} else {
-		unsigned long exit_qual = 0;
-		if (nr == DB_VECTOR)
-			exit_qual = vcpu->arch.dr6;
-
 		if (vmcs12->exception_bitmap & (1u << nr)) {
-			nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+			if (nr == DB_VECTOR)
+				*exit_qual = vcpu->arch.dr6;
+			else
+				*exit_qual = 0;
 			return 1;
 		}
 	}
@@ -2494,14 +2507,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
 	bool has_error_code = vcpu->arch.exception.has_error_code;
-	bool reinject = vcpu->arch.exception.reinject;
 	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
-	if (!reinject && is_guest_mode(vcpu) &&
-	    nested_vmx_check_exception(vcpu))
-		return;
-
 	if (has_error_code) {
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
 		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
@@ -2600,7 +2608,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		if (index >= 0)
 			move_msr_up(vmx, index, save_nmsrs++);
 		index = __find_msr_index(vmx, MSR_TSC_AUX);
-		if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu))
+		if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
 			move_msr_up(vmx, index, save_nmsrs++);
 		/*
 		 * MSR_STAR is only needed on long mode guests, and only
@@ -2660,12 +2668,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	}
 }
 
-static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
-	return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31)));
-}
-
 /*
  * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
  * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
@@ -2674,7 +2676,7 @@ static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
  */
 static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
 {
-	return nested && guest_cpuid_has_vmx(vcpu);
+	return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
 }
 
 /*
@@ -2797,21 +2799,21 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	vmx->nested.nested_vmx_procbased_ctls_low &=
 		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
 
-	/* secondary cpu-based controls */
+	/*
+	 * secondary cpu-based controls.  Do not include those that
+	 * depend on CPUID bits, they are added later by vmx_cpuid_update.
+	 */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
 		vmx->nested.nested_vmx_secondary_ctls_low,
 		vmx->nested.nested_vmx_secondary_ctls_high);
 	vmx->nested.nested_vmx_secondary_ctls_low = 0;
 	vmx->nested.nested_vmx_secondary_ctls_high &=
-		SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-		SECONDARY_EXEC_RDTSCP |
 		SECONDARY_EXEC_DESC |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-		SECONDARY_EXEC_WBINVD_EXITING |
-		SECONDARY_EXEC_XSAVES;
+		SECONDARY_EXEC_WBINVD_EXITING;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
@@ -2834,6 +2836,17 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
+	if (cpu_has_vmx_vmfunc()) {
+		vmx->nested.nested_vmx_secondary_ctls_high |=
+			SECONDARY_EXEC_ENABLE_VMFUNC;
+		/*
+		 * Advertise EPTP switching unconditionally
+		 * since we emulate it
+		 */
+		vmx->nested.nested_vmx_vmfunc_controls =
+			VMX_VMFUNC_EPTP_SWITCHING;
+	}
+
 	/*
 	 * Old versions of KVM use the single-context version without
 	 * checking for support, so declare that it is supported even
@@ -3203,6 +3216,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		*pdata = vmx->nested.nested_vmx_ept_caps |
 			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 		break;
+	case MSR_IA32_VMX_VMFUNC:
+		*pdata = vmx->nested.nested_vmx_vmfunc_controls;
+		break;
 	default:
 		return 1;
 	}
@@ -3256,7 +3272,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_BNDCFGS:
 		if (!kvm_mpx_supported() ||
-		    (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+		    (!msr_info->host_initiated &&
+		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
 			return 1;
 		msr_info->data = vmcs_read64(GUEST_BNDCFGS);
 		break;
@@ -3280,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.ia32_xss;
 		break;
 	case MSR_TSC_AUX:
-		if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
 			return 1;
 		/* Otherwise falls through */
 	default:
@@ -3339,9 +3357,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_BNDCFGS:
 		if (!kvm_mpx_supported() ||
-		    (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+		    (!msr_info->host_initiated &&
+		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
 			return 1;
-		if (is_noncanonical_address(data & PAGE_MASK) ||
+		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
 		    (data & MSR_IA32_BNDCFGS_RSVD))
 			return 1;
 		vmcs_write64(GUEST_BNDCFGS, data);
@@ -3402,7 +3421,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
 		break;
 	case MSR_TSC_AUX:
-		if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
 			return 1;
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
@@ -3639,8 +3659,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 			SECONDARY_EXEC_SHADOW_VMCS |
 			SECONDARY_EXEC_XSAVES |
+			SECONDARY_EXEC_RDSEED |
+			SECONDARY_EXEC_RDRAND |
 			SECONDARY_EXEC_ENABLE_PML |
-			SECONDARY_EXEC_TSC_SCALING;
+			SECONDARY_EXEC_TSC_SCALING |
+			SECONDARY_EXEC_ENABLE_VMFUNC;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -4272,16 +4295,22 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	vmx->emulation_required = emulation_required(vcpu);
 }
 
+static int get_ept_level(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
+		return 5;
+	return 4;
+}
+
 static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 {
-	u64 eptp;
+	u64 eptp = VMX_EPTP_MT_WB;
 
-	/* TODO write the value reading from MSR */
-	eptp = VMX_EPT_DEFAULT_MT |
-		VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
+	eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
+
 	if (enable_ept_ad_bits &&
 	    (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
-		eptp |= VMX_EPT_AD_ENABLE_BIT;
+		eptp |= VMX_EPTP_AD_ENABLE_BIT;
 	eptp |= (root_hpa & PAGE_MASK);
 
 	return eptp;
@@ -5243,10 +5272,24 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 	return exec_control;
 }
 
-static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
+static bool vmx_rdrand_supported(void)
 {
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_RDRAND;
+}
+
+static bool vmx_rdseed_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_RDSEED;
+}
+
+static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
+{
+	struct kvm_vcpu *vcpu = &vmx->vcpu;
+
 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
-	if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu))
+	if (!cpu_need_virtualize_apic_accesses(vcpu))
 		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 	if (vmx->vpid == 0)
 		exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
@@ -5260,7 +5303,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 	if (!ple_gap)
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
+	if (!kvm_vcpu_apicv_active(vcpu))
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@@ -5274,7 +5317,92 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 	if (!enable_pml)
 		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-	return exec_control;
+	if (vmx_xsaves_supported()) {
+		/* Exposing XSAVES only when XSAVE is exposed */
+		bool xsaves_enabled =
+			guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+			guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
+
+		if (!xsaves_enabled)
+			exec_control &= ~SECONDARY_EXEC_XSAVES;
+
+		if (nested) {
+			if (xsaves_enabled)
+				vmx->nested.nested_vmx_secondary_ctls_high |=
+					SECONDARY_EXEC_XSAVES;
+			else
+				vmx->nested.nested_vmx_secondary_ctls_high &=
+					~SECONDARY_EXEC_XSAVES;
+		}
+	}
+
+	if (vmx_rdtscp_supported()) {
+		bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
+		if (!rdtscp_enabled)
+			exec_control &= ~SECONDARY_EXEC_RDTSCP;
+
+		if (nested) {
+			if (rdtscp_enabled)
+				vmx->nested.nested_vmx_secondary_ctls_high |=
+					SECONDARY_EXEC_RDTSCP;
+			else
+				vmx->nested.nested_vmx_secondary_ctls_high &=
+					~SECONDARY_EXEC_RDTSCP;
+		}
+	}
+
+	if (vmx_invpcid_supported()) {
+		/* Exposing INVPCID only when PCID is exposed */
+		bool invpcid_enabled =
+			guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
+			guest_cpuid_has(vcpu, X86_FEATURE_PCID);
+
+		if (!invpcid_enabled) {
+			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+			guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
+		}
+
+		if (nested) {
+			if (invpcid_enabled)
+				vmx->nested.nested_vmx_secondary_ctls_high |=
+					SECONDARY_EXEC_ENABLE_INVPCID;
+			else
+				vmx->nested.nested_vmx_secondary_ctls_high &=
+					~SECONDARY_EXEC_ENABLE_INVPCID;
+		}
+	}
+
+	if (vmx_rdrand_supported()) {
+		bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
+		if (rdrand_enabled)
+			exec_control &= ~SECONDARY_EXEC_RDRAND;
+
+		if (nested) {
+			if (rdrand_enabled)
+				vmx->nested.nested_vmx_secondary_ctls_high |=
+					SECONDARY_EXEC_RDRAND;
+			else
+				vmx->nested.nested_vmx_secondary_ctls_high &=
+					~SECONDARY_EXEC_RDRAND;
+		}
+	}
+
+	if (vmx_rdseed_supported()) {
+		bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
+		if (rdseed_enabled)
+			exec_control &= ~SECONDARY_EXEC_RDSEED;
+
+		if (nested) {
+			if (rdseed_enabled)
+				vmx->nested.nested_vmx_secondary_ctls_high |=
+					SECONDARY_EXEC_RDSEED;
+			else
+				vmx->nested.nested_vmx_secondary_ctls_high &=
+					~SECONDARY_EXEC_RDSEED;
+		}
+	}
+
+	vmx->secondary_exec_control = exec_control;
 }
 
 static void ept_set_mmio_spte_mask(void)
@@ -5318,8 +5446,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
 	if (cpu_has_secondary_exec_ctrls()) {
+		vmx_compute_secondary_exec_control(vmx);
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
-				vmx_secondary_exec_control(vmx));
+			     vmx->secondary_exec_control);
 	}
 
 	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
@@ -5357,6 +5486,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
 #endif
 
+	if (cpu_has_vmx_vmfunc())
+		vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
@@ -5835,6 +5967,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 static int handle_triple_fault(struct kvm_vcpu *vcpu)
 {
 	vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+	vcpu->mmio_needed = 0;
 	return 0;
 }
 
@@ -6330,7 +6463,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
 	gpa_t gpa;
-	u32 error_code;
+	u64 error_code;
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
@@ -6362,9 +6495,10 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 			EPT_VIOLATION_EXECUTABLE))
 		      ? PFERR_PRESENT_MASK : 0;
 
-	vcpu->arch.gpa_available = true;
-	vcpu->arch.exit_qualification = exit_qualification;
+	error_code |= (exit_qualification & 0x100) != 0 ?
+	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
 
+	vcpu->arch.exit_qualification = exit_qualification;
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
@@ -6373,23 +6507,20 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	int ret;
 	gpa_t gpa;
 
+	/*
+	 * A nested guest cannot optimize MMIO vmexits, because we have an
+	 * nGPA here instead of the required GPA.
+	 */
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-	if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+	if (!is_guest_mode(vcpu) &&
+	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		trace_kvm_fast_mmio(gpa);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	ret = handle_mmio_page_fault(vcpu, gpa, true);
-	vcpu->arch.gpa_available = true;
-	if (likely(ret == RET_MMIO_PF_EMULATE))
-		return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
-					      EMULATE_DONE;
-
-	if (unlikely(ret == RET_MMIO_PF_INVALID))
-		return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
-
-	if (unlikely(ret == RET_MMIO_PF_RETRY))
-		return 1;
+	ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
+	if (ret >= 0)
+		return ret;
 
 	/* It is the real ept misconfig */
 	WARN_ON(1);
@@ -6611,7 +6742,8 @@ static __init int hardware_setup(void)
 		init_vmcs_shadow_fields();
 
 	if (!cpu_has_vmx_ept() ||
-	    !cpu_has_vmx_ept_4levels()) {
+	    !cpu_has_vmx_ept_4levels() ||
+	    !cpu_has_vmx_ept_mt_wb()) {
 		enable_ept = 0;
 		enable_unrestricted_guest = 0;
 		enable_ept_ad_bits = 0;
@@ -6754,7 +6886,13 @@ static int handle_pause(struct kvm_vcpu *vcpu)
 	if (ple_gap)
 		grow_ple_window(vcpu);
 
-	kvm_vcpu_on_spin(vcpu);
+	/*
+	 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
+	 * VM-execution control is ignored if CPL > 0. OTOH, KVM
+	 * never set PAUSE_EXITING and just set PLE if supported,
+	 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
+	 */
+	kvm_vcpu_on_spin(vcpu, true);
 	return kvm_skip_emulated_instruction(vcpu);
 }
 
@@ -6769,6 +6907,12 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
 	return handle_nop(vcpu);
 }
 
+static int handle_invalid_op(struct kvm_vcpu *vcpu)
+{
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return 1;
+}
+
 static int handle_monitor_trap(struct kvm_vcpu *vcpu)
 {
 	return 1;
@@ -6985,7 +7129,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 		 * non-canonical form. This is the only check on the memory
 		 * destination for long mode!
 		 */
-		exn = is_noncanonical_address(*ret);
+		exn = is_noncanonical_address(*ret, vcpu);
 	} else if (is_protmode(vcpu)) {
 		/* Protected mode: apply checks for segment validity in the
 		 * following order:
@@ -7149,19 +7293,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	page = nested_get_page(vcpu, vmptr);
-	if (page == NULL) {
+	page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
+	if (is_error_page(page)) {
 		nested_vmx_failInvalid(vcpu);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 	if (*(u32 *)kmap(page) != VMCS12_REVISION) {
 		kunmap(page);
-		nested_release_page_clean(page);
+		kvm_release_page_clean(page);
 		nested_vmx_failInvalid(vcpu);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 	kunmap(page);
-	nested_release_page_clean(page);
+	kvm_release_page_clean(page);
 
 	vmx->nested.vmxon_ptr = vmptr;
 	ret = enter_vmx_operation(vcpu);
@@ -7242,16 +7386,16 @@ static void free_nested(struct vcpu_vmx *vmx)
 	kfree(vmx->nested.cached_vmcs12);
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
-		nested_release_page(vmx->nested.apic_access_page);
+		kvm_release_page_dirty(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
 	if (vmx->nested.virtual_apic_page) {
-		nested_release_page(vmx->nested.virtual_apic_page);
+		kvm_release_page_dirty(vmx->nested.virtual_apic_page);
 		vmx->nested.virtual_apic_page = NULL;
 	}
 	if (vmx->nested.pi_desc_page) {
 		kunmap(vmx->nested.pi_desc_page);
-		nested_release_page(vmx->nested.pi_desc_page);
+		kvm_release_page_dirty(vmx->nested.pi_desc_page);
 		vmx->nested.pi_desc_page = NULL;
 		vmx->nested.pi_desc = NULL;
 	}
@@ -7618,15 +7762,15 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 	if (vmx->nested.current_vmptr != vmptr) {
 		struct vmcs12 *new_vmcs12;
 		struct page *page;
-		page = nested_get_page(vcpu, vmptr);
-		if (page == NULL) {
+		page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
+		if (is_error_page(page)) {
 			nested_vmx_failInvalid(vcpu);
 			return kvm_skip_emulated_instruction(vcpu);
 		}
 		new_vmcs12 = kmap(page);
 		if (new_vmcs12->revision_id != VMCS12_REVISION) {
 			kunmap(page);
-			nested_release_page_clean(page);
+			kvm_release_page_clean(page);
 			nested_vmx_failValid(vcpu,
 				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
 			return kvm_skip_emulated_instruction(vcpu);
@@ -7639,7 +7783,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		 */
 		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
 		kunmap(page);
-		nested_release_page_clean(page);
+		kvm_release_page_clean(page);
 
 		set_current_vmptr(vmx, vmptr);
 	}
@@ -7790,7 +7934,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
 	switch (type) {
 	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
-		if (is_noncanonical_address(operand.gla)) {
+		if (is_noncanonical_address(operand.gla, vcpu)) {
 			nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 			return kvm_skip_emulated_instruction(vcpu);
@@ -7847,6 +7991,124 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+	/* Check for memory type validity */
+	switch (address & VMX_EPTP_MT_MASK) {
+	case VMX_EPTP_MT_UC:
+		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+			return false;
+		break;
+	case VMX_EPTP_MT_WB:
+		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	/* only 4 levels page-walk length are valid */
+	if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
+		return false;
+
+	/* Reserved bits should not be set */
+	if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+		return false;
+
+	/* AD, if set, should be supported */
+	if (address & VMX_EPTP_AD_ENABLE_BIT) {
+		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+			return false;
+	}
+
+	return true;
+}
+
+static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
+				     struct vmcs12 *vmcs12)
+{
+	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+	u64 address;
+	bool accessed_dirty;
+	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+	if (!nested_cpu_has_eptp_switching(vmcs12) ||
+	    !nested_cpu_has_ept(vmcs12))
+		return 1;
+
+	if (index >= VMFUNC_EPTP_ENTRIES)
+		return 1;
+
+
+	if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
+				     &address, index * 8, 8))
+		return 1;
+
+	accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
+
+	/*
+	 * If the (L2) guest does a vmfunc to the currently
+	 * active ept pointer, we don't have to do anything else
+	 */
+	if (vmcs12->ept_pointer != address) {
+		if (!valid_ept_address(vcpu, address))
+			return 1;
+
+		kvm_mmu_unload(vcpu);
+		mmu->ept_ad = accessed_dirty;
+		mmu->base_role.ad_disabled = !accessed_dirty;
+		vmcs12->ept_pointer = address;
+		/*
+		 * TODO: Check what's the correct approach in case
+		 * mmu reload fails. Currently, we just let the next
+		 * reload potentially fail
+		 */
+		kvm_mmu_reload(vcpu);
+	}
+
+	return 0;
+}
+
+static int handle_vmfunc(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12;
+	u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+
+	/*
+	 * VMFUNC is only supported for nested guests, but we always enable the
+	 * secondary control for simplicity; for non-nested mode, fake that we
+	 * didn't by injecting #UD.
+	 */
+	if (!is_guest_mode(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	vmcs12 = get_vmcs12(vcpu);
+	if ((vmcs12->vm_function_control & (1 << function)) == 0)
+		goto fail;
+
+	switch (function) {
+	case 0:
+		if (nested_vmx_eptp_switching(vcpu, vmcs12))
+			goto fail;
+		break;
+	default:
+		goto fail;
+	}
+	return kvm_skip_emulated_instruction(vcpu);
+
+fail:
+	nested_vmx_vmexit(vcpu, vmx->exit_reason,
+			  vmcs_read32(VM_EXIT_INTR_INFO),
+			  vmcs_readl(EXIT_QUALIFICATION));
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7894,9 +8156,12 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
+	[EXIT_REASON_RDRAND]                  = handle_invalid_op,
+	[EXIT_REASON_RDSEED]                  = handle_invalid_op,
 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
+	[EXIT_REASON_VMFUNC]                  = handle_vmfunc,
 	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
 };
 
@@ -8212,6 +8477,10 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 		 * table is L0's fault.
 		 */
 		return false;
+	case EXIT_REASON_INVPCID:
+		return
+			nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
+			nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
 	case EXIT_REASON_WBINVD:
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
@@ -8229,6 +8498,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 	case EXIT_REASON_PML_FULL:
 		/* We emulate PML support to L1. */
 		return false;
+	case EXIT_REASON_VMFUNC:
+		/* VM functions are emulated through L2->L0 vmexits. */
+		return false;
 	default:
 		return true;
 	}
@@ -8487,7 +8759,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
 	trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
-	vcpu->arch.gpa_available = false;
 
 	/*
 	 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
@@ -9341,11 +9612,6 @@ static void __init vmx_check_processor_compat(void *rtn)
 	}
 }
 
-static int get_ept_level(void)
-{
-	return VMX_EPT_DEFAULT_GAW + 1;
-}
-
 static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
 	u8 cache;
@@ -9462,39 +9728,13 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
 
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
-	struct kvm_cpuid_entry2 *best;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx);
 
-	if (vmx_rdtscp_supported()) {
-		bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu);
-		if (!rdtscp_enabled)
-			secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP;
-
-		if (nested) {
-			if (rdtscp_enabled)
-				vmx->nested.nested_vmx_secondary_ctls_high |=
-					SECONDARY_EXEC_RDTSCP;
-			else
-				vmx->nested.nested_vmx_secondary_ctls_high &=
-					~SECONDARY_EXEC_RDTSCP;
-		}
+	if (cpu_has_secondary_exec_ctrls()) {
+		vmx_compute_secondary_exec_control(vmx);
+		vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
 	}
 
-	/* Exposing INVPCID only when PCID is exposed */
-	best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
-	if (vmx_invpcid_supported() &&
-	    (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) ||
-	    !guest_cpuid_has_pcid(vcpu))) {
-		secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID;
-
-		if (best)
-			best->ebx &= ~bit(X86_FEATURE_INVPCID);
-	}
-
-	if (cpu_has_secondary_exec_ctrls())
-		vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
 	if (nested_vmx_allowed(vcpu))
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
 			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -9535,7 +9775,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 
 static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
 {
-	return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT;
+	return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
 }
 
 /* Callbacks for nested_ept_init_mmu_context: */
@@ -9548,18 +9788,15 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
 
 static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
-	bool wants_ad;
-
 	WARN_ON(mmu_is_nested(vcpu));
-	wants_ad = nested_ept_ad_enabled(vcpu);
-	if (wants_ad && !enable_ept_ad_bits)
+	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
 		return 1;
 
 	kvm_mmu_unload(vcpu);
 	kvm_init_shadow_ept_mmu(vcpu,
 			to_vmx(vcpu)->nested.nested_vmx_ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
-			wants_ad);
+			nested_ept_ad_enabled(vcpu));
 	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
 	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
 	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@ -9610,6 +9847,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 					struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct page *page;
 	u64 hpa;
 
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@ -9619,17 +9857,19 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 		 * physical address remains valid. We keep a reference
 		 * to it so we can release it later.
 		 */
-		if (vmx->nested.apic_access_page) /* shouldn't happen */
-			nested_release_page(vmx->nested.apic_access_page);
-		vmx->nested.apic_access_page =
-			nested_get_page(vcpu, vmcs12->apic_access_addr);
+		if (vmx->nested.apic_access_page) { /* shouldn't happen */
+			kvm_release_page_dirty(vmx->nested.apic_access_page);
+			vmx->nested.apic_access_page = NULL;
+		}
+		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
 		/*
 		 * If translation failed, no matter: This feature asks
 		 * to exit when accessing the given address, and if it
 		 * can never be accessed, this feature won't do
 		 * anything anyway.
 		 */
-		if (vmx->nested.apic_access_page) {
+		if (!is_error_page(page)) {
+			vmx->nested.apic_access_page = page;
 			hpa = page_to_phys(vmx->nested.apic_access_page);
 			vmcs_write64(APIC_ACCESS_ADDR, hpa);
 		} else {
@@ -9644,10 +9884,11 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 	}
 
 	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-		if (vmx->nested.virtual_apic_page) /* shouldn't happen */
-			nested_release_page(vmx->nested.virtual_apic_page);
-		vmx->nested.virtual_apic_page =
-			nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
+		if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
+			kvm_release_page_dirty(vmx->nested.virtual_apic_page);
+			vmx->nested.virtual_apic_page = NULL;
+		}
+		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
 
 		/*
 		 * If translation failed, VM entry will fail because
@@ -9662,7 +9903,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 		 * control.  But such a configuration is useless, so
 		 * let's keep the code simple.
 		 */
-		if (vmx->nested.virtual_apic_page) {
+		if (!is_error_page(page)) {
+			vmx->nested.virtual_apic_page = page;
 			hpa = page_to_phys(vmx->nested.virtual_apic_page);
 			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
 		}
@@ -9671,16 +9913,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 	if (nested_cpu_has_posted_intr(vmcs12)) {
 		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
 			kunmap(vmx->nested.pi_desc_page);
-			nested_release_page(vmx->nested.pi_desc_page);
+			kvm_release_page_dirty(vmx->nested.pi_desc_page);
+			vmx->nested.pi_desc_page = NULL;
 		}
-		vmx->nested.pi_desc_page =
-			nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
-		vmx->nested.pi_desc =
-			(struct pi_desc *)kmap(vmx->nested.pi_desc_page);
-		if (!vmx->nested.pi_desc) {
-			nested_release_page_clean(vmx->nested.pi_desc_page);
+		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
+		if (is_error_page(page))
 			return;
-		}
+		vmx->nested.pi_desc_page = page;
+		vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
 		vmx->nested.pi_desc =
 			(struct pi_desc *)((void *)vmx->nested.pi_desc +
 			(unsigned long)(vmcs12->posted_intr_desc_addr &
@@ -9746,6 +9986,18 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
+						struct vmcs12 *vmcs12)
+{
+	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+		return 0;
+
+	if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
+		return -EINVAL;
+
+	return 0;
+}
+
 /*
  * Merge L0's and L1's MSR bitmap, return false to indicate that
  * we do not use the hardware.
@@ -9762,8 +10014,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
 		return false;
 
-	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-	if (!page)
+	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
+	if (is_error_page(page))
 		return false;
 	msr_bitmap_l1 = (unsigned long *)kmap(page);
 
@@ -9793,7 +10045,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 		}
 	}
 	kunmap(page);
-	nested_release_page_clean(page);
+	kvm_release_page_clean(page);
 
 	return true;
 }
@@ -10187,13 +10439,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		enable_ept ? vmcs12->page_fault_error_code_match : 0);
 
 	if (cpu_has_secondary_exec_ctrls()) {
-		exec_control = vmx_secondary_exec_control(vmx);
+		exec_control = vmx->secondary_exec_control;
 
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+				  SECONDARY_EXEC_ENABLE_INVPCID |
 				  SECONDARY_EXEC_RDTSCP |
+				  SECONDARY_EXEC_XSAVES |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
+				  SECONDARY_EXEC_ENABLE_VMFUNC);
 		if (nested_cpu_has(vmcs12,
 				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
 			vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
@@ -10201,6 +10456,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			exec_control |= vmcs12_exec_ctrl;
 		}
 
+		/* All VMFUNCs are currently emulated through L0 vmexits.  */
+		if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
+			vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
 		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
 			vmcs_write64(EOI_EXIT_BITMAP0,
 				vmcs12->eoi_exit_bitmap0);
@@ -10426,6 +10685,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
@@ -10453,6 +10715,18 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 				vmx->nested.nested_vmx_entry_ctls_high))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (nested_cpu_has_vmfunc(vmcs12)) {
+		if (vmcs12->vm_function_control &
+		    ~vmx->nested.nested_vmx_vmfunc_controls)
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		if (nested_cpu_has_eptp_switching(vmcs12)) {
+			if (!nested_cpu_has_ept(vmcs12) ||
+			    !page_address_valid(vcpu, vmcs12->eptp_list_address))
+				return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+		}
+	}
+
 	if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
@@ -10699,7 +10973,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 	u32 idt_vectoring;
 	unsigned int nr;
 
-	if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
+	if (vcpu->arch.exception.injected) {
 		nr = vcpu->arch.exception.nr;
 		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
 
@@ -10738,12 +11012,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long exit_qual;
 
-	if (vcpu->arch.exception.pending ||
-		vcpu->arch.nmi_injected ||
-		vcpu->arch.interrupt.pending)
+	if (kvm_event_needs_reinjection(vcpu))
 		return -EBUSY;
 
+	if (vcpu->arch.exception.pending &&
+		nested_vmx_check_exception(vcpu, &exit_qual)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+		vcpu->arch.exception.pending = false;
+		return 0;
+	}
+
 	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
 	    vmx->nested.preemption_timer_expired) {
 		if (vmx->nested.nested_run_pending)
@@ -11184,16 +11466,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
-		nested_release_page(vmx->nested.apic_access_page);
+		kvm_release_page_dirty(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
 	if (vmx->nested.virtual_apic_page) {
-		nested_release_page(vmx->nested.virtual_apic_page);
+		kvm_release_page_dirty(vmx->nested.virtual_apic_page);
 		vmx->nested.virtual_apic_page = NULL;
 	}
 	if (vmx->nested.pi_desc_page) {
 		kunmap(vmx->nested.pi_desc_page);
-		nested_release_page(vmx->nested.pi_desc_page);
+		kvm_release_page_dirty(vmx->nested.pi_desc_page);
 		vmx->nested.pi_desc_page = NULL;
 		vmx->nested.pi_desc = NULL;
 	}
@@ -11369,14 +11651,14 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
 
 		gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
 
-		page = nested_get_page(vcpu, vmcs12->pml_address);
-		if (!page)
+		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
+		if (is_error_page(page))
 			return 0;
 
 		pml_address = kmap(page);
 		pml_address[vmcs12->guest_pml_index--] = gpa;
 		kunmap(page);
-		nested_release_page_clean(page);
+		kvm_release_page_clean(page);
 	}
 
 	return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ef5102f..6069af8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -311,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
 	u64 new_state = msr_info->data &
 		(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-	u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
-		0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
+	u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
+		(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
 
+	if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
+		return 1;
 	if (!msr_info->host_initiated &&
-	    ((msr_info->data & reserved_bits) != 0 ||
-	     new_state == X2APIC_ENABLE ||
-	     (new_state == MSR_IA32_APICBASE_ENABLE &&
+	    ((new_state == MSR_IA32_APICBASE_ENABLE &&
 	      old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
 	     (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
 	      old_state == 0)))
@@ -390,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
-	if (!vcpu->arch.exception.pending) {
+	if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
 	queue:
 		if (has_error && !is_protmode(vcpu))
 			has_error = false;
-		vcpu->arch.exception.pending = true;
+		if (reinject) {
+			/*
+			 * On vmentry, vcpu->arch.exception.pending is only
+			 * true if an event injection was blocked by
+			 * nested_run_pending.  In that case, however,
+			 * vcpu_enter_guest requests an immediate exit,
+			 * and the guest shouldn't proceed far enough to
+			 * need reinjection.
+			 */
+			WARN_ON_ONCE(vcpu->arch.exception.pending);
+			vcpu->arch.exception.injected = true;
+		} else {
+			vcpu->arch.exception.pending = true;
+			vcpu->arch.exception.injected = false;
+		}
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
 		vcpu->arch.exception.error_code = error_code;
-		vcpu->arch.exception.reinject = reinject;
 		return;
 	}
 
@@ -413,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 	class2 = exception_class(nr);
 	if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
 		|| (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
-		/* generate double fault per SDM Table 5-5 */
+		/*
+		 * Generate double fault per SDM Table 5-5.  Set
+		 * exception.pending = true so that the double fault
+		 * can trigger a nested vmexit.
+		 */
 		vcpu->arch.exception.pending = true;
+		vcpu->arch.exception.injected = false;
 		vcpu->arch.exception.has_error_code = true;
 		vcpu->arch.exception.nr = DF_VECTOR;
 		vcpu->arch.exception.error_code = 0;
@@ -755,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
 
-	if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
 		return 1;
 
-	if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
 		return 1;
 
-	if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
 		return 1;
 
-	if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
 		return 1;
 
-	if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
+		return 1;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
 		return 1;
 
 	if (is_long_mode(vcpu)) {
@@ -780,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		return 1;
 
 	if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
-		if (!guest_cpuid_has_pcid(vcpu))
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
 			return 1;
 
 		/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
@@ -814,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		return 0;
 	}
 
-	if (is_long_mode(vcpu)) {
-		if (cr3 & CR3_L_MODE_RESERVED_BITS)
-			return 1;
-	} else if (is_pae(vcpu) && is_paging(vcpu) &&
+	if (is_long_mode(vcpu) &&
+	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
+		return 1;
+	else if (is_pae(vcpu) && is_paging(vcpu) &&
 		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;
 
@@ -884,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
 {
 	u64 fixed = DR6_FIXED_1;
 
-	if (!guest_cpuid_has_rtm(vcpu))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
 		fixed |= DR6_RTM;
 	return fixed;
 }
@@ -994,6 +1015,7 @@ static u32 emulated_msrs[] = {
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+	HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
 	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
 	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
 	HV_X64_MSR_RESET,
@@ -1022,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 	if (efer & efer_reserved_bits)
 		return false;
 
-	if (efer & EFER_FFXSR) {
-		struct kvm_cpuid_entry2 *feat;
-
-		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-		if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
+	if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
 			return false;
-	}
 
-	if (efer & EFER_SVME) {
-		struct kvm_cpuid_entry2 *feat;
-
-		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-		if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
+	if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
 			return false;
-	}
 
 	return true;
 }
@@ -1084,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_KERNEL_GS_BASE:
 	case MSR_CSTAR:
 	case MSR_LSTAR:
-		if (is_noncanonical_address(msr->data))
+		if (is_noncanonical_address(msr->data, vcpu))
 			return 1;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
@@ -1101,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		 * value, and that something deterministic happens if the guest
 		 * invokes 64-bit SYSENTER.
 		 */
-		msr->data = get_canonical(msr->data);
+		msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
 	}
 	return kvm_x86_ops->set_msr(vcpu, msr);
 }
@@ -1534,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
 	vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
 
-	if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
+	if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
 		update_ia32_tsc_adjust_msr(vcpu, offset);
+
 	kvm_vcpu_write_tsc_offset(vcpu, offset);
 	raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 
@@ -2185,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		kvm_set_lapic_tscdeadline_msr(vcpu, data);
 		break;
 	case MSR_IA32_TSC_ADJUST:
-		if (guest_cpuid_has_tsc_adjust(vcpu)) {
+		if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
 			if (!msr_info->host_initiated) {
 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				adjust_tsc_offset_guest(vcpu, adj);
@@ -2307,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
 		break;
 	case MSR_AMD64_OSVW_ID_LENGTH:
-		if (!guest_cpuid_has_osvw(vcpu))
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
 			return 1;
 		vcpu->arch.osvw.length = data;
 		break;
 	case MSR_AMD64_OSVW_STATUS:
-		if (!guest_cpuid_has_osvw(vcpu))
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
 			return 1;
 		vcpu->arch.osvw.status = data;
 		break;
@@ -2537,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0xbe702111;
 		break;
 	case MSR_AMD64_OSVW_ID_LENGTH:
-		if (!guest_cpuid_has_osvw(vcpu))
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
 			return 1;
 		msr_info->data = vcpu->arch.osvw.length;
 		break;
 	case MSR_AMD64_OSVW_STATUS:
-		if (!guest_cpuid_has_osvw(vcpu))
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
 			return 1;
 		msr_info->data = vcpu->arch.osvw.status;
 		break;
@@ -2882,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	int idx;
+
+	if (vcpu->preempted)
+		vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
+
 	/*
 	 * Disable page faults because we're in atomic context here.
 	 * kvm_write_guest_offset_cached() would call might_fault()
@@ -3074,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 					       struct kvm_vcpu_events *events)
 {
 	process_nmi(vcpu);
+	/*
+	 * FIXME: pass injected and pending separately.  This is only
+	 * needed for nested virtualization, whose state cannot be
+	 * migrated yet.  For now we can combine them.
+	 */
 	events->exception.injected =
-		vcpu->arch.exception.pending &&
+		(vcpu->arch.exception.pending ||
+		 vcpu->arch.exception.injected) &&
 		!kvm_exception_is_soft(vcpu->arch.exception.nr);
 	events->exception.nr = vcpu->arch.exception.nr;
 	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
@@ -3130,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	process_nmi(vcpu);
+	vcpu->arch.exception.injected = false;
 	vcpu->arch.exception.pending = events->exception.injected;
 	vcpu->arch.exception.nr = events->exception.nr;
 	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -4671,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
 	 */
 	if (vcpu->arch.gpa_available &&
 	    emulator_can_use_gpa(ctxt) &&
-	    vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
-	    (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
-		gpa = exception->address;
-		goto mmio;
+	    (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
+		gpa = vcpu->arch.gpa_val;
+		ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
+	} else {
+		ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
+		if (ret < 0)
+			return X86EMUL_PROPAGATE_FAULT;
 	}
 
-	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
-
-	if (ret < 0)
-		return X86EMUL_PROPAGATE_FAULT;
-
-	/* For APIC access vmexit */
-	if (ret)
-		goto mmio;
-
-	if (ops->read_write_emulate(vcpu, gpa, val, bytes))
+	if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
 		return X86EMUL_CONTINUE;
 
-mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
@@ -5227,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
 	return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
 }
 
-static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
-			       u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+			u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
 {
-	kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
+	return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
 }
 
 static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
@@ -6362,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 	int r;
 
 	/* try to reinject previous events if any */
+	if (vcpu->arch.exception.injected) {
+		kvm_x86_ops->queue_exception(vcpu);
+		return 0;
+	}
+
+	/*
+	 * Exceptions must be injected immediately, or the exception
+	 * frame will have the address of the NMI or interrupt handler.
+	 */
+	if (!vcpu->arch.exception.pending) {
+		if (vcpu->arch.nmi_injected) {
+			kvm_x86_ops->set_nmi(vcpu);
+			return 0;
+		}
+
+		if (vcpu->arch.interrupt.pending) {
+			kvm_x86_ops->set_irq(vcpu);
+			return 0;
+		}
+	}
+
+	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+		r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+		if (r != 0)
+			return r;
+	}
+
+	/* try to inject new event if pending */
 	if (vcpu->arch.exception.pending) {
 		trace_kvm_inj_exception(vcpu->arch.exception.nr,
 					vcpu->arch.exception.has_error_code,
 					vcpu->arch.exception.error_code);
 
+		vcpu->arch.exception.pending = false;
+		vcpu->arch.exception.injected = true;
+
 		if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
@@ -6378,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 		}
 
 		kvm_x86_ops->queue_exception(vcpu);
-		return 0;
-	}
-
-	if (vcpu->arch.nmi_injected) {
-		kvm_x86_ops->set_nmi(vcpu);
-		return 0;
-	}
-
-	if (vcpu->arch.interrupt.pending) {
-		kvm_x86_ops->set_irq(vcpu);
-		return 0;
-	}
-
-	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
-		r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
-		if (r != 0)
-			return r;
-	}
-
-	/* try to inject new event if pending */
-	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+	} else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
 		vcpu->arch.smi_pending = false;
 		enter_smm(vcpu);
 	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
@@ -6615,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
 	vcpu->arch.hflags |= HF_SMM_MASK;
 	memset(buf, 0, 512);
-	if (guest_cpuid_has_longmode(vcpu))
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		enter_smm_save_state_64(vcpu, buf);
 	else
 		enter_smm_save_state_32(vcpu, buf);
@@ -6667,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
 	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
 
-	if (guest_cpuid_has_longmode(vcpu))
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		kvm_x86_ops->set_efer(vcpu, 0);
 
 	kvm_update_cpuid(vcpu);
@@ -6774,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+			vcpu->mmio_needed = 0;
 			r = 0;
 			goto out;
 		}
@@ -6862,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 				kvm_x86_ops->enable_nmi_window(vcpu);
 			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
 				kvm_x86_ops->enable_irq_window(vcpu);
+			WARN_ON(vcpu->arch.exception.pending);
 		}
 
 		if (kvm_lapic_enabled(vcpu)) {
@@ -7004,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.apic_attention)
 		kvm_lapic_sync_from_vapic(vcpu);
 
+	vcpu->arch.gpa_available = false;
 	r = kvm_x86_ops->handle_exit(vcpu);
 	return r;
 
@@ -7422,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	int pending_vec, max_bits, idx;
 	struct desc_ptr dt;
 
-	if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+			(sregs->cr4 & X86_CR4_OSXSAVE))
+		return -EINVAL;
+
+	apic_base_msr.data = sregs->apic_base;
+	apic_base_msr.host_initiated = true;
+	if (kvm_set_apic_base(vcpu, &apic_base_msr))
 		return -EINVAL;
 
 	dt.size = sregs->idt.limit;
@@ -7441,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
 	kvm_x86_ops->set_efer(vcpu, sregs->efer);
-	apic_base_msr.data = sregs->apic_base;
-	apic_base_msr.host_initiated = true;
-	kvm_set_apic_base(vcpu, &apic_base_msr);
 
 	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
 	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
@@ -7734,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.nmi_injected = false;
 	kvm_clear_interrupt_queue(vcpu);
 	kvm_clear_exception_queue(vcpu);
+	vcpu->arch.exception.pending = false;
 
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
 	kvm_update_dr0123(vcpu);
@@ -7993,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm_pmu_init(vcpu);
 
 	vcpu->arch.pending_external_vector = -1;
+	vcpu->arch.preempted_in_kernel = false;
 
 	kvm_hv_vcpu_init(vcpu);
 
@@ -8440,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
 }
 
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.preempted_in_kernel;
+}
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6120670..51e349c 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -11,7 +11,7 @@
 
 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.exception.pending = false;
+	vcpu->arch.exception.injected = false;
 }
 
 static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
@@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 
 static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+	return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
 		vcpu->arch.nmi_injected;
 }
 
@@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
 	return cs_l;
 }
 
+static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+	return (vcpu->arch.efer & EFER_LMA) &&
+		 kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+#else
+	return 0;
+#endif
+}
+
 static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
@@ -87,10 +97,48 @@ static inline u32 bit(int bitno)
 	return 1 << (bitno & 31);
 }
 
+static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
+}
+
+static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
+{
+	return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
+}
+
+static inline u64 get_canonical(u64 la, u8 vaddr_bits)
+{
+	return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+	return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la;
+#else
+	return false;
+#endif
+}
+
+static inline bool emul_is_noncanonical_address(u64 la,
+						struct x86_emulate_ctxt *ctxt)
+{
+#ifdef CONFIG_X86_64
+	return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
+#else
+	return false;
+#endif
+}
+
 static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 					gva_t gva, gfn_t gfn, unsigned access)
 {
-	vcpu->arch.mmio_gva = gva & PAGE_MASK;
+	/*
+	 * If this is a shadow nested page table, the "GVA" is
+	 * actually a nGPA.
+	 */
+	vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
 	vcpu->arch.access = access;
 	vcpu->arch.mmio_gfn = gfn;
 	vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 11e4074..f2228b1 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -618,3 +618,20 @@ static void quirk_apple_mbp_poweroff(struct pci_dev *pdev)
 		dev_info(dev, "can't work around MacBook Pro poweroff issue\n");
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff);
+
+/*
+ * VMD-enabled root ports will change the source ID for all messages
+ * to the VMD device. Rather than doing device matching with the source
+ * ID, the AER driver should traverse the child device tree, reading
+ * AER registers to find the faulting device.
+ */
+static void quirk_no_aersid(struct pci_dev *pdev)
+{
+	/* VMD Domain */
+	if (is_vmd(pdev->bus))
+		pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 3e8636f..bb05fc5 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -901,6 +901,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 		struct pci_dev *pf_pdev;
 
 		pdev = to_pci_dev(dev);
+
+#ifdef CONFIG_X86
+		/* VMD child devices currently cannot be handled individually */
+		if (is_vmd(pdev->bus))
+			return NULL;
+#endif
+
 		/* VFs aren't listed in scope tables; we need to look up
 		 * the PF instead to find the IOMMU. */
 		pf_pdev = pci_physfn(pdev);
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 09c10f4..deb2030 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -72,6 +72,11 @@ static DEFINE_IDA(pci_endpoint_test_ida);
 
 #define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \
 					    miscdev)
+
+static bool no_msi;
+module_param(no_msi, bool, 0444);
+MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test");
+
 enum pci_barno {
 	BAR_0,
 	BAR_1,
@@ -90,9 +95,15 @@ struct pci_endpoint_test {
 	/* mutex to protect the ioctls */
 	struct mutex	mutex;
 	struct miscdevice miscdev;
+	enum pci_barno test_reg_bar;
+	size_t alignment;
 };
 
-static int bar_size[] = { 4, 512, 1024, 16384, 131072, 1048576 };
+struct pci_endpoint_test_data {
+	enum pci_barno test_reg_bar;
+	size_t alignment;
+	bool no_msi;
+};
 
 static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test,
 					  u32 offset)
@@ -141,11 +152,15 @@ static bool pci_endpoint_test_bar(struct pci_endpoint_test *test,
 	int j;
 	u32 val;
 	int size;
+	struct pci_dev *pdev = test->pdev;
 
 	if (!test->bar[barno])
 		return false;
 
-	size = bar_size[barno];
+	size = pci_resource_len(pdev, barno);
+
+	if (barno == test->test_reg_bar)
+		size = 0x4;
 
 	for (j = 0; j < size; j += 4)
 		pci_endpoint_test_bar_writel(test, barno, j, 0xA0A0A0A0);
@@ -202,16 +217,32 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
 	dma_addr_t dst_phys_addr;
 	struct pci_dev *pdev = test->pdev;
 	struct device *dev = &pdev->dev;
+	void *orig_src_addr;
+	dma_addr_t orig_src_phys_addr;
+	void *orig_dst_addr;
+	dma_addr_t orig_dst_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
 	u32 src_crc32;
 	u32 dst_crc32;
 
-	src_addr = dma_alloc_coherent(dev, size, &src_phys_addr, GFP_KERNEL);
-	if (!src_addr) {
+	orig_src_addr = dma_alloc_coherent(dev, size + alignment,
+					   &orig_src_phys_addr, GFP_KERNEL);
+	if (!orig_src_addr) {
 		dev_err(dev, "failed to allocate source buffer\n");
 		ret = false;
 		goto err;
 	}
 
+	if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) {
+		src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment);
+		offset = src_phys_addr - orig_src_phys_addr;
+		src_addr = orig_src_addr + offset;
+	} else {
+		src_phys_addr = orig_src_phys_addr;
+		src_addr = orig_src_addr;
+	}
+
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR,
 				 lower_32_bits(src_phys_addr));
 
@@ -221,11 +252,21 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
 	get_random_bytes(src_addr, size);
 	src_crc32 = crc32_le(~0, src_addr, size);
 
-	dst_addr = dma_alloc_coherent(dev, size, &dst_phys_addr, GFP_KERNEL);
-	if (!dst_addr) {
+	orig_dst_addr = dma_alloc_coherent(dev, size + alignment,
+					   &orig_dst_phys_addr, GFP_KERNEL);
+	if (!orig_dst_addr) {
 		dev_err(dev, "failed to allocate destination address\n");
 		ret = false;
-		goto err_src_addr;
+		goto err_orig_src_addr;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) {
+		dst_phys_addr = PTR_ALIGN(orig_dst_phys_addr, alignment);
+		offset = dst_phys_addr - orig_dst_phys_addr;
+		dst_addr = orig_dst_addr + offset;
+	} else {
+		dst_phys_addr = orig_dst_phys_addr;
+		dst_addr = orig_dst_addr;
 	}
 
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR,
@@ -245,10 +286,12 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
 	if (dst_crc32 == src_crc32)
 		ret = true;
 
-	dma_free_coherent(dev, size, dst_addr, dst_phys_addr);
+	dma_free_coherent(dev, size + alignment, orig_dst_addr,
+			  orig_dst_phys_addr);
 
-err_src_addr:
-	dma_free_coherent(dev, size, src_addr, src_phys_addr);
+err_orig_src_addr:
+	dma_free_coherent(dev, size + alignment, orig_src_addr,
+			  orig_src_phys_addr);
 
 err:
 	return ret;
@@ -262,15 +305,29 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
 	dma_addr_t phys_addr;
 	struct pci_dev *pdev = test->pdev;
 	struct device *dev = &pdev->dev;
+	void *orig_addr;
+	dma_addr_t orig_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
 	u32 crc32;
 
-	addr = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL);
-	if (!addr) {
+	orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
+				       GFP_KERNEL);
+	if (!orig_addr) {
 		dev_err(dev, "failed to allocate address\n");
 		ret = false;
 		goto err;
 	}
 
+	if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
+		phys_addr =  PTR_ALIGN(orig_phys_addr, alignment);
+		offset = phys_addr - orig_phys_addr;
+		addr = orig_addr + offset;
+	} else {
+		phys_addr = orig_phys_addr;
+		addr = orig_addr;
+	}
+
 	get_random_bytes(addr, size);
 
 	crc32 = crc32_le(~0, addr, size);
@@ -293,7 +350,7 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
 	if (reg & STATUS_READ_SUCCESS)
 		ret = true;
 
-	dma_free_coherent(dev, size, addr, phys_addr);
+	dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr);
 
 err:
 	return ret;
@@ -306,15 +363,29 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
 	dma_addr_t phys_addr;
 	struct pci_dev *pdev = test->pdev;
 	struct device *dev = &pdev->dev;
+	void *orig_addr;
+	dma_addr_t orig_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
 	u32 crc32;
 
-	addr = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL);
-	if (!addr) {
+	orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
+				       GFP_KERNEL);
+	if (!orig_addr) {
 		dev_err(dev, "failed to allocate destination address\n");
 		ret = false;
 		goto err;
 	}
 
+	if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
+		phys_addr = PTR_ALIGN(orig_phys_addr, alignment);
+		offset = phys_addr - orig_phys_addr;
+		addr = orig_addr + offset;
+	} else {
+		phys_addr = orig_phys_addr;
+		addr = orig_addr;
+	}
+
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR,
 				 lower_32_bits(phys_addr));
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR,
@@ -331,7 +402,7 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
 	if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM))
 		ret = true;
 
-	dma_free_coherent(dev, size, addr, phys_addr);
+	dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr);
 err:
 	return ret;
 }
@@ -383,13 +454,15 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 {
 	int i;
 	int err;
-	int irq;
+	int irq = 0;
 	int id;
 	char name[20];
 	enum pci_barno bar;
 	void __iomem *base;
 	struct device *dev = &pdev->dev;
 	struct pci_endpoint_test *test;
+	struct pci_endpoint_test_data *data;
+	enum pci_barno test_reg_bar = BAR_0;
 	struct miscdevice *misc_device;
 
 	if (pci_is_bridge(pdev))
@@ -399,7 +472,17 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 	if (!test)
 		return -ENOMEM;
 
+	test->test_reg_bar = 0;
+	test->alignment = 0;
 	test->pdev = pdev;
+
+	data = (struct pci_endpoint_test_data *)ent->driver_data;
+	if (data) {
+		test_reg_bar = data->test_reg_bar;
+		test->alignment = data->alignment;
+		no_msi = data->no_msi;
+	}
+
 	init_completion(&test->irq_raised);
 	mutex_init(&test->mutex);
 
@@ -417,9 +500,11 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
-	if (irq < 0)
-		dev_err(dev, "failed to get MSI interrupts\n");
+	if (!no_msi) {
+		irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
+		if (irq < 0)
+			dev_err(dev, "failed to get MSI interrupts\n");
+	}
 
 	err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler,
 			       IRQF_SHARED, DRV_MODULE_NAME, test);
@@ -441,14 +526,15 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 		base = pci_ioremap_bar(pdev, bar);
 		if (!base) {
 			dev_err(dev, "failed to read BAR%d\n", bar);
-			WARN_ON(bar == BAR_0);
+			WARN_ON(bar == test_reg_bar);
 		}
 		test->bar[bar] = base;
 	}
 
-	test->base = test->bar[0];
+	test->base = test->bar[test_reg_bar];
 	if (!test->base) {
-		dev_err(dev, "Cannot perform PCI test without BAR0\n");
+		dev_err(dev, "Cannot perform PCI test without BAR%d\n",
+			test_reg_bar);
 		goto err_iounmap;
 	}
 
diff --git a/drivers/pci/dwc/Kconfig b/drivers/pci/dwc/Kconfig
index d275aad..22ec82f 100644
--- a/drivers/pci/dwc/Kconfig
+++ b/drivers/pci/dwc/Kconfig
@@ -25,7 +25,7 @@
 	 work either as EP or RC. In order to enable host-specific features
 	 PCI_DRA7XX_HOST must be selected and in order to enable device-
 	 specific features PCI_DRA7XX_EP must be selected. This uses
-	 the Designware core.
+	 the DesignWare core.
 
 if PCI_DRA7XX
 
@@ -97,8 +97,8 @@
 	select PCIE_DW_HOST
 	help
 	  Say Y here if you want to enable PCI controller support on Keystone
-	  SoCs. The PCI controller on Keystone is based on Designware hardware
-	  and therefore the driver re-uses the Designware core functions to
+	  SoCs. The PCI controller on Keystone is based on DesignWare hardware
+	  and therefore the driver re-uses the DesignWare core functions to
 	  implement the driver.
 
 config PCI_LAYERSCAPE
@@ -132,7 +132,7 @@
 	select PCIE_DW_HOST
 	help
 	  Say Y here to enable PCIe controller support on Qualcomm SoCs. The
-	  PCIe controller uses the Designware core plus Qualcomm-specific
+	  PCIe controller uses the DesignWare core plus Qualcomm-specific
 	  hardware wrappers.
 
 config PCIE_ARMADA_8K
@@ -145,8 +145,8 @@
 	help
 	  Say Y here if you want to enable PCIe controller support on
 	  Armada-8K SoCs. The PCIe controller on Armada-8K is based on
-	  Designware hardware and therefore the driver re-uses the
-	  Designware core functions to implement the driver.
+	  DesignWare hardware and therefore the driver re-uses the
+	  DesignWare core functions to implement the driver.
 
 config PCIE_ARTPEC6
 	bool "Axis ARTPEC-6 PCIe controller"
diff --git a/drivers/pci/dwc/pci-dra7xx.c b/drivers/pci/dwc/pci-dra7xx.c
index f2fc5f4..34427a6 100644
--- a/drivers/pci/dwc/pci-dra7xx.c
+++ b/drivers/pci/dwc/pci-dra7xx.c
@@ -195,7 +195,7 @@ static void dra7xx_pcie_enable_interrupts(struct dra7xx_pcie *dra7xx)
 	dra7xx_pcie_enable_msi_interrupts(dra7xx);
 }
 
-static void dra7xx_pcie_host_init(struct pcie_port *pp)
+static int dra7xx_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
@@ -206,6 +206,8 @@ static void dra7xx_pcie_host_init(struct pcie_port *pp)
 	dw_pcie_wait_for_link(pci);
 	dw_pcie_msi_init(pp);
 	dra7xx_pcie_enable_interrupts(dra7xx);
+
+	return 0;
 }
 
 static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = {
@@ -238,7 +240,7 @@ static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
 		return -ENODEV;
 	}
 
-	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, 4,
+	dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
 						   &intx_domain_ops, pp);
 	if (!dra7xx->irq_domain) {
 		dev_err(dev, "Failed to get a INTx IRQ domain\n");
@@ -275,7 +277,6 @@ static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-
 static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
 {
 	struct dra7xx_pcie *dra7xx = arg;
@@ -335,10 +336,23 @@ static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar)
+{
+	u32 reg;
+
+	reg = PCI_BASE_ADDRESS_0 + (4 * bar);
+	dw_pcie_writel_dbi2(pci, reg, 0x0);
+	dw_pcie_writel_dbi(pci, reg, 0x0);
+}
+
 static void dra7xx_pcie_ep_init(struct dw_pcie_ep *ep)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
+	enum pci_barno bar;
+
+	for (bar = BAR_0; bar <= BAR_5; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
 
 	dra7xx_pcie_enable_wrapper_interrupts(dra7xx);
 }
@@ -435,7 +449,7 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx,
 	pp->irq = platform_get_irq(pdev, 1);
 	if (pp->irq < 0) {
 		dev_err(dev, "missing IRQ resource\n");
-		return -EINVAL;
+		return pp->irq;
 	}
 
 	ret = devm_request_irq(dev, pp->irq, dra7xx_pcie_msi_irq_handler,
@@ -616,8 +630,8 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(dev, "missing IRQ resource\n");
-		return -EINVAL;
+		dev_err(dev, "missing IRQ resource: %d\n", irq);
+		return irq;
 	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ti_conf");
diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c
index c78c065..5596fde 100644
--- a/drivers/pci/dwc/pci-exynos.c
+++ b/drivers/pci/dwc/pci-exynos.c
@@ -581,13 +581,15 @@ static int exynos_pcie_link_up(struct dw_pcie *pci)
 	return 0;
 }
 
-static void exynos_pcie_host_init(struct pcie_port *pp)
+static int exynos_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct exynos_pcie *ep = to_exynos_pcie(pci);
 
 	exynos_pcie_establish_link(ep);
 	exynos_pcie_enable_interrupts(ep);
+
+	return 0;
 }
 
 static const struct dw_pcie_host_ops exynos_pcie_host_ops = {
@@ -605,9 +607,9 @@ static int __init exynos_add_pcie_port(struct exynos_pcie *ep,
 	int ret;
 
 	pp->irq = platform_get_irq(pdev, 1);
-	if (!pp->irq) {
+	if (pp->irq < 0) {
 		dev_err(dev, "failed to get irq\n");
-		return -ENODEV;
+		return pp->irq;
 	}
 	ret = devm_request_irq(dev, pp->irq, exynos_pcie_irq_handler,
 				IRQF_SHARED, "exynos-pcie", ep);
@@ -618,9 +620,9 @@ static int __init exynos_add_pcie_port(struct exynos_pcie *ep,
 
 	if (IS_ENABLED(CONFIG_PCI_MSI)) {
 		pp->msi_irq = platform_get_irq(pdev, 0);
-		if (!pp->msi_irq) {
+		if (pp->msi_irq < 0) {
 			dev_err(dev, "failed to get msi irq\n");
-			return -ENODEV;
+			return pp->msi_irq;
 		}
 
 		ret = devm_request_irq(dev, pp->msi_irq,
diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index bf5c361..b734835 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c
@@ -636,7 +636,7 @@ static int imx6_pcie_establish_link(struct imx6_pcie *imx6_pcie)
 	return ret;
 }
 
-static void imx6_pcie_host_init(struct pcie_port *pp)
+static int imx6_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci);
@@ -649,6 +649,8 @@ static void imx6_pcie_host_init(struct pcie_port *pp)
 
 	if (IS_ENABLED(CONFIG_PCI_MSI))
 		dw_pcie_msi_init(pp);
+
+	return 0;
 }
 
 static int imx6_pcie_link_up(struct dw_pcie *pci)
@@ -778,14 +780,15 @@ static int imx6_pcie_probe(struct platform_device *pdev)
 		}
 		break;
 	case IMX7D:
-		imx6_pcie->pciephy_reset = devm_reset_control_get(dev,
-								  "pciephy");
+		imx6_pcie->pciephy_reset = devm_reset_control_get_exclusive(dev,
+									    "pciephy");
 		if (IS_ERR(imx6_pcie->pciephy_reset)) {
 			dev_err(dev, "Failed to get PCIEPHY reset control\n");
 			return PTR_ERR(imx6_pcie->pciephy_reset);
 		}
 
-		imx6_pcie->apps_reset = devm_reset_control_get(dev, "apps");
+		imx6_pcie->apps_reset = devm_reset_control_get_exclusive(dev,
+									 "apps");
 		if (IS_ERR(imx6_pcie->apps_reset)) {
 			dev_err(dev, "Failed to get PCIE APPS reset control\n");
 			return PTR_ERR(imx6_pcie->apps_reset);
diff --git a/drivers/pci/dwc/pci-keystone-dw.c b/drivers/pci/dwc/pci-keystone-dw.c
index 8bc626e..2fb20b8 100644
--- a/drivers/pci/dwc/pci-keystone-dw.c
+++ b/drivers/pci/dwc/pci-keystone-dw.c
@@ -1,5 +1,5 @@
 /*
- * Designware application register space functions for Keystone PCI controller
+ * DesignWare application register space functions for Keystone PCI controller
  *
  * Copyright (C) 2013-2014 Texas Instruments., Ltd.
  *		http://www.ti.com
@@ -168,16 +168,12 @@ void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq)
 
 static void ks_dw_pcie_msi_irq_mask(struct irq_data *d)
 {
-	struct keystone_pcie *ks_pcie;
 	struct msi_desc *msi;
 	struct pcie_port *pp;
-	struct dw_pcie *pci;
 	u32 offset;
 
 	msi = irq_data_get_msi_desc(d);
 	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
-	pci = to_dw_pcie_from_pp(pp);
-	ks_pcie = to_keystone_pcie(pci);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
 	/* Mask the end point if PVM implemented */
@@ -191,16 +187,12 @@ static void ks_dw_pcie_msi_irq_mask(struct irq_data *d)
 
 static void ks_dw_pcie_msi_irq_unmask(struct irq_data *d)
 {
-	struct keystone_pcie *ks_pcie;
 	struct msi_desc *msi;
 	struct pcie_port *pp;
-	struct dw_pcie *pci;
 	u32 offset;
 
 	msi = irq_data_get_msi_desc(d);
 	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
-	pci = to_dw_pcie_from_pp(pp);
-	ks_pcie = to_keystone_pcie(pci);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
 	/* Mask the end point if PVM implemented */
@@ -259,7 +251,7 @@ void ks_dw_pcie_enable_legacy_irqs(struct keystone_pcie *ks_pcie)
 {
 	int i;
 
-	for (i = 0; i < MAX_LEGACY_IRQS; i++)
+	for (i = 0; i < PCI_NUM_INTX; i++)
 		ks_dw_app_writel(ks_pcie, IRQ_ENABLE_SET + (i << 4), 0x1);
 }
 
@@ -565,7 +557,7 @@ int __init ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie,
 	/* Create legacy IRQ domain */
 	ks_pcie->legacy_irq_domain =
 			irq_domain_add_linear(ks_pcie->legacy_intc_np,
-					MAX_LEGACY_IRQS,
+					PCI_NUM_INTX,
 					&ks_dw_pcie_legacy_irq_domain_ops,
 					NULL);
 	if (!ks_pcie->legacy_irq_domain) {
diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c
index 4783cec1..5bee3af 100644
--- a/drivers/pci/dwc/pci-keystone.c
+++ b/drivers/pci/dwc/pci-keystone.c
@@ -32,10 +32,6 @@
 
 #define DRIVER_NAME	"keystone-pcie"
 
-/* driver specific constants */
-#define MAX_MSI_HOST_IRQS		8
-#define MAX_LEGACY_HOST_IRQS		4
-
 /* DEV_STAT_CTRL */
 #define PCIE_CAP_BASE		0x70
 
@@ -173,7 +169,7 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie,
 
 	if (legacy) {
 		np_temp = &ks_pcie->legacy_intc_np;
-		max_host_irqs = MAX_LEGACY_HOST_IRQS;
+		max_host_irqs = PCI_NUM_INTX;
 		host_irqs = &ks_pcie->legacy_host_irqs[0];
 	} else {
 		np_temp = &ks_pcie->msi_intc_np;
@@ -261,7 +257,7 @@ static int keystone_pcie_fault(unsigned long addr, unsigned int fsr,
 	return 0;
 }
 
-static void __init ks_pcie_host_init(struct pcie_port *pp)
+static int __init ks_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct keystone_pcie *ks_pcie = to_keystone_pcie(pci);
@@ -289,6 +285,8 @@ static void __init ks_pcie_host_init(struct pcie_port *pp)
 	 */
 	hook_fault_code(17, keystone_pcie_fault, SIGBUS, 0,
 			"Asynchronous external abort");
+
+	return 0;
 }
 
 static const struct dw_pcie_host_ops keystone_pcie_host_ops = {
diff --git a/drivers/pci/dwc/pci-keystone.h b/drivers/pci/dwc/pci-keystone.h
index 74c5825..30b7bc2 100644
--- a/drivers/pci/dwc/pci-keystone.h
+++ b/drivers/pci/dwc/pci-keystone.h
@@ -12,9 +12,7 @@
  * published by the Free Software Foundation.
  */
 
-#define MAX_LEGACY_IRQS			4
 #define MAX_MSI_HOST_IRQS		8
-#define MAX_LEGACY_HOST_IRQS		4
 
 struct keystone_pcie {
 	struct dw_pcie		*pci;
@@ -22,7 +20,7 @@ struct keystone_pcie {
 	/* PCI Device ID */
 	u32			device_id;
 	int			num_legacy_host_irqs;
-	int			legacy_host_irqs[MAX_LEGACY_HOST_IRQS];
+	int			legacy_host_irqs[PCI_NUM_INTX];
 	struct			device_node *legacy_intc_np;
 
 	int			num_msi_host_irqs;
diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c
index fd86128..87fa486 100644
--- a/drivers/pci/dwc/pci-layerscape.c
+++ b/drivers/pci/dwc/pci-layerscape.c
@@ -33,7 +33,8 @@
 
 /* PEX Internal Configuration Registers */
 #define PCIE_STRFMR1		0x71c /* Symbol Timer & Filter Mask Register1 */
-#define PCIE_DBI_RO_WR_EN	0x8bc /* DBI Read-Only Write Enable Register */
+
+#define PCIE_IATU_NUM		6
 
 struct ls_pcie_drvdata {
 	u32 lut_offset;
@@ -72,14 +73,6 @@ static void ls_pcie_clear_multifunction(struct ls_pcie *pcie)
 	iowrite8(PCI_HEADER_TYPE_BRIDGE, pci->dbi_base + PCI_HEADER_TYPE);
 }
 
-/* Fix class value */
-static void ls_pcie_fix_class(struct ls_pcie *pcie)
-{
-	struct dw_pcie *pci = pcie->pci;
-
-	iowrite16(PCI_CLASS_BRIDGE_PCI, pci->dbi_base + PCI_CLASS_DEVICE);
-}
-
 /* Drop MSG TLP except for Vendor MSG */
 static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
 {
@@ -91,6 +84,14 @@ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
 	iowrite32(val, pci->dbi_base + PCIE_STRFMR1);
 }
 
+static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
+{
+	int i;
+
+	for (i = 0; i < PCIE_IATU_NUM; i++)
+		dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
+}
+
 static int ls1021_pcie_link_up(struct dw_pcie *pci)
 {
 	u32 state;
@@ -108,33 +109,6 @@ static int ls1021_pcie_link_up(struct dw_pcie *pci)
 	return 1;
 }
 
-static void ls1021_pcie_host_init(struct pcie_port *pp)
-{
-	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
-	struct ls_pcie *pcie = to_ls_pcie(pci);
-	struct device *dev = pci->dev;
-	u32 index[2];
-
-	pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node,
-						     "fsl,pcie-scfg");
-	if (IS_ERR(pcie->scfg)) {
-		dev_err(dev, "No syscfg phandle specified\n");
-		pcie->scfg = NULL;
-		return;
-	}
-
-	if (of_property_read_u32_array(dev->of_node,
-				       "fsl,pcie-scfg", index, 2)) {
-		pcie->scfg = NULL;
-		return;
-	}
-	pcie->index = index[1];
-
-	dw_pcie_setup_rc(pp);
-
-	ls_pcie_drop_msg_tlp(pcie);
-}
-
 static int ls_pcie_link_up(struct dw_pcie *pci)
 {
 	struct ls_pcie *pcie = to_ls_pcie(pci);
@@ -150,16 +124,54 @@ static int ls_pcie_link_up(struct dw_pcie *pci)
 	return 1;
 }
 
-static void ls_pcie_host_init(struct pcie_port *pp)
+static int ls_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct ls_pcie *pcie = to_ls_pcie(pci);
 
-	iowrite32(1, pci->dbi_base + PCIE_DBI_RO_WR_EN);
-	ls_pcie_fix_class(pcie);
+	/*
+	 * Disable outbound windows configured by the bootloader to avoid
+	 * one transaction hitting multiple outbound windows.
+	 * dw_pcie_setup_rc() will reconfigure the outbound windows.
+	 */
+	ls_pcie_disable_outbound_atus(pcie);
+
+	dw_pcie_dbi_ro_wr_en(pci);
 	ls_pcie_clear_multifunction(pcie);
+	dw_pcie_dbi_ro_wr_dis(pci);
+
 	ls_pcie_drop_msg_tlp(pcie);
-	iowrite32(0, pci->dbi_base + PCIE_DBI_RO_WR_EN);
+
+	dw_pcie_setup_rc(pp);
+
+	return 0;
+}
+
+static int ls1021_pcie_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct ls_pcie *pcie = to_ls_pcie(pci);
+	struct device *dev = pci->dev;
+	u32 index[2];
+	int ret;
+
+	pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node,
+						     "fsl,pcie-scfg");
+	if (IS_ERR(pcie->scfg)) {
+		ret = PTR_ERR(pcie->scfg);
+		dev_err(dev, "No syscfg phandle specified\n");
+		pcie->scfg = NULL;
+		return ret;
+	}
+
+	if (of_property_read_u32_array(dev->of_node,
+				       "fsl,pcie-scfg", index, 2)) {
+		pcie->scfg = NULL;
+		return -EINVAL;
+	}
+	pcie->index = index[1];
+
+	return ls_pcie_host_init(pp);
 }
 
 static int ls_pcie_msi_host_init(struct pcie_port *pp,
@@ -232,12 +244,22 @@ static struct ls_pcie_drvdata ls2080_drvdata = {
 	.dw_pcie_ops = &dw_ls_pcie_ops,
 };
 
+static struct ls_pcie_drvdata ls2088_drvdata = {
+	.lut_offset = 0x80000,
+	.ltssm_shift = 0,
+	.lut_dbg = 0x407fc,
+	.ops = &ls_pcie_host_ops,
+	.dw_pcie_ops = &dw_ls_pcie_ops,
+};
+
 static const struct of_device_id ls_pcie_of_match[] = {
 	{ .compatible = "fsl,ls1021a-pcie", .data = &ls1021_drvdata },
 	{ .compatible = "fsl,ls1043a-pcie", .data = &ls1043_drvdata },
 	{ .compatible = "fsl,ls1046a-pcie", .data = &ls1046_drvdata },
 	{ .compatible = "fsl,ls2080a-pcie", .data = &ls2080_drvdata },
 	{ .compatible = "fsl,ls2085a-pcie", .data = &ls2080_drvdata },
+	{ .compatible = "fsl,ls2088a-pcie", .data = &ls2088_drvdata },
+	{ .compatible = "fsl,ls1088a-pcie", .data = &ls2088_drvdata },
 	{ },
 };
 
diff --git a/drivers/pci/dwc/pcie-armada8k.c b/drivers/pci/dwc/pcie-armada8k.c
index ea8f34a..370d057 100644
--- a/drivers/pci/dwc/pcie-armada8k.c
+++ b/drivers/pci/dwc/pcie-armada8k.c
@@ -134,13 +134,15 @@ static void armada8k_pcie_establish_link(struct armada8k_pcie *pcie)
 		dev_err(pci->dev, "Link not up after reconfiguration\n");
 }
 
-static void armada8k_pcie_host_init(struct pcie_port *pp)
+static int armada8k_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct armada8k_pcie *pcie = to_armada8k_pcie(pci);
 
 	dw_pcie_setup_rc(pp);
 	armada8k_pcie_establish_link(pcie);
+
+	return 0;
 }
 
 static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg)
@@ -176,9 +178,9 @@ static int armada8k_add_pcie_port(struct armada8k_pcie *pcie,
 	pp->ops = &armada8k_pcie_host_ops;
 
 	pp->irq = platform_get_irq(pdev, 0);
-	if (!pp->irq) {
+	if (pp->irq < 0) {
 		dev_err(dev, "failed to get irq for port\n");
-		return -ENODEV;
+		return pp->irq;
 	}
 
 	ret = devm_request_irq(dev, pp->irq, armada8k_pcie_irq_handler,
@@ -226,7 +228,9 @@ static int armada8k_pcie_probe(struct platform_device *pdev)
 	if (IS_ERR(pcie->clk))
 		return PTR_ERR(pcie->clk);
 
-	clk_prepare_enable(pcie->clk);
+	ret = clk_prepare_enable(pcie->clk);
+	if (ret)
+		return ret;
 
 	/* Get the dw-pcie unit configuration/control registers base. */
 	base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl");
diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c
index 01c6f78..6653619 100644
--- a/drivers/pci/dwc/pcie-artpec6.c
+++ b/drivers/pci/dwc/pcie-artpec6.c
@@ -141,12 +141,6 @@ static int artpec6_pcie_establish_link(struct artpec6_pcie *artpec6_pcie)
 	artpec6_pcie_writel(artpec6_pcie, PCIECFG, val);
 	usleep_range(100, 200);
 
-	/*
-	 * Enable writing to config regs. This is required as the Synopsys
-	 * driver changes the class code. That register needs DBI write enable.
-	 */
-	dw_pcie_writel_dbi(pci, MISC_CONTROL_1_OFF, DBI_RO_WR_EN);
-
 	/* setup root complex */
 	dw_pcie_setup_rc(pp);
 
@@ -175,13 +169,15 @@ static void artpec6_pcie_enable_interrupts(struct artpec6_pcie *artpec6_pcie)
 		dw_pcie_msi_init(pp);
 }
 
-static void artpec6_pcie_host_init(struct pcie_port *pp)
+static int artpec6_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci);
 
 	artpec6_pcie_establish_link(artpec6_pcie);
 	artpec6_pcie_enable_interrupts(artpec6_pcie);
+
+	return 0;
 }
 
 static const struct dw_pcie_host_ops artpec6_pcie_host_ops = {
@@ -207,9 +203,9 @@ static int artpec6_add_pcie_port(struct artpec6_pcie *artpec6_pcie,
 
 	if (IS_ENABLED(CONFIG_PCI_MSI)) {
 		pp->msi_irq = platform_get_irq_byname(pdev, "msi");
-		if (pp->msi_irq <= 0) {
+		if (pp->msi_irq < 0) {
 			dev_err(dev, "failed to get MSI irq\n");
-			return -ENODEV;
+			return pp->msi_irq;
 		}
 
 		ret = devm_request_irq(dev, pp->msi_irq,
diff --git a/drivers/pci/dwc/pcie-designware-ep.c b/drivers/pci/dwc/pcie-designware-ep.c
index 3984063..d53d5f1 100644
--- a/drivers/pci/dwc/pcie-designware-ep.c
+++ b/drivers/pci/dwc/pcie-designware-ep.c
@@ -1,5 +1,5 @@
 /**
- * Synopsys Designware PCIe Endpoint controller driver
+ * Synopsys DesignWare PCIe Endpoint controller driver
  *
  * Copyright (C) 2017 Texas Instruments
  * Author: Kishon Vijay Abraham I <kishon@ti.com>
@@ -283,7 +283,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 {
 	int ret;
 	void *addr;
-	enum pci_barno bar;
 	struct pci_epc *epc;
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 	struct device *dev = pci->dev;
@@ -312,9 +311,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 		return -ENOMEM;
 	ep->outbound_addr = addr;
 
-	for (bar = BAR_0; bar <= BAR_5; bar++)
-		dw_pcie_ep_reset_bar(pci, bar);
-
 	if (ep->ops->ep_init)
 		ep->ops->ep_init(ep);
 
@@ -328,7 +324,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 	if (ret < 0)
 		epc->max_functions = 1;
 
-	ret = pci_epc_mem_init(epc, ep->phys_base, ep->addr_size);
+	ret = __pci_epc_mem_init(epc, ep->phys_base, ep->addr_size,
+				 ep->page_size);
 	if (ret < 0) {
 		dev_err(dev, "Failed to initialize address space\n");
 		return ret;
diff --git a/drivers/pci/dwc/pcie-designware-host.c b/drivers/pci/dwc/pcie-designware-host.c
index d29c020..81e2157 100644
--- a/drivers/pci/dwc/pcie-designware-host.c
+++ b/drivers/pci/dwc/pcie-designware-host.c
@@ -1,5 +1,5 @@
 /*
- * Synopsys Designware PCIe host controller driver
+ * Synopsys DesignWare PCIe host controller driver
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
@@ -71,9 +71,9 @@ irqreturn_t dw_handle_msi_irq(struct pcie_port *pp)
 		while ((pos = find_next_bit((unsigned long *) &val, 32,
 					    pos)) != 32) {
 			irq = irq_find_mapping(pp->irq_domain, i * 32 + pos);
+			generic_handle_irq(irq);
 			dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_STATUS + i * 12,
 					    4, 1 << pos);
-			generic_handle_irq(irq);
 			pos++;
 		}
 	}
@@ -401,8 +401,11 @@ int dw_pcie_host_init(struct pcie_port *pp)
 		}
 	}
 
-	if (pp->ops->host_init)
-		pp->ops->host_init(pp);
+	if (pp->ops->host_init) {
+		ret = pp->ops->host_init(pp);
+		if (ret)
+			goto error;
+	}
 
 	pp->root_bus_nr = pp->busn->start;
 
@@ -594,10 +597,12 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
 	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
 
 	/* setup interrupt pins */
+	dw_pcie_dbi_ro_wr_en(pci);
 	val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE);
 	val &= 0xffff00ff;
 	val |= 0x00000100;
 	dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val);
+	dw_pcie_dbi_ro_wr_dis(pci);
 
 	/* setup bus numbers */
 	val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
@@ -634,8 +639,12 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
 
 	dw_pcie_wr_own_conf(pp, PCI_BASE_ADDRESS_0, 4, 0);
 
+	/* Enable write permission for the DBI read-only register */
+	dw_pcie_dbi_ro_wr_en(pci);
 	/* program correct class for RC */
 	dw_pcie_wr_own_conf(pp, PCI_CLASS_DEVICE, 2, PCI_CLASS_BRIDGE_PCI);
+	/* Better disable write permission right after the update */
+	dw_pcie_dbi_ro_wr_dis(pci);
 
 	dw_pcie_rd_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, &val);
 	val |= PORT_LOGIC_SPEED_CHANGE;
diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c
index 091b4e7..168e238 100644
--- a/drivers/pci/dwc/pcie-designware-plat.c
+++ b/drivers/pci/dwc/pcie-designware-plat.c
@@ -35,7 +35,7 @@ static irqreturn_t dw_plat_pcie_msi_irq_handler(int irq, void *arg)
 	return dw_handle_msi_irq(pp);
 }
 
-static void dw_plat_pcie_host_init(struct pcie_port *pp)
+static int dw_plat_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 
@@ -44,6 +44,8 @@ static void dw_plat_pcie_host_init(struct pcie_port *pp)
 
 	if (IS_ENABLED(CONFIG_PCI_MSI))
 		dw_pcie_msi_init(pp);
+
+	return 0;
 }
 
 static const struct dw_pcie_host_ops dw_plat_pcie_host_ops = {
diff --git a/drivers/pci/dwc/pcie-designware.c b/drivers/pci/dwc/pcie-designware.c
index 0e03af2..88abddd 100644
--- a/drivers/pci/dwc/pcie-designware.c
+++ b/drivers/pci/dwc/pcie-designware.c
@@ -1,5 +1,5 @@
 /*
- * Synopsys Designware PCIe host controller driver
+ * Synopsys DesignWare PCIe host controller driver
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
@@ -107,8 +107,9 @@ static void dw_pcie_writel_ob_unroll(struct dw_pcie *pci, u32 index, u32 reg,
 	dw_pcie_writel_dbi(pci, offset + reg, val);
 }
 
-void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, int index, int type,
-				      u64 cpu_addr, u64 pci_addr, u32 size)
+static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, int index,
+					     int type, u64 cpu_addr,
+					     u64 pci_addr, u32 size)
 {
 	u32 retries, val;
 
@@ -177,7 +178,7 @@ void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type,
 	 */
 	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) {
 		val = dw_pcie_readl_dbi(pci, PCIE_ATU_CR2);
-		if (val == PCIE_ATU_ENABLE)
+		if (val & PCIE_ATU_ENABLE)
 			return;
 
 		usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX);
@@ -200,8 +201,9 @@ static void dw_pcie_writel_ib_unroll(struct dw_pcie *pci, u32 index, u32 reg,
 	dw_pcie_writel_dbi(pci, offset + reg, val);
 }
 
-int dw_pcie_prog_inbound_atu_unroll(struct dw_pcie *pci, int index, int bar,
-				    u64 cpu_addr, enum dw_pcie_as_type as_type)
+static int dw_pcie_prog_inbound_atu_unroll(struct dw_pcie *pci, int index,
+					   int bar, u64 cpu_addr,
+					   enum dw_pcie_as_type as_type)
 {
 	int type;
 	u32 retries, val;
diff --git a/drivers/pci/dwc/pcie-designware.h b/drivers/pci/dwc/pcie-designware.h
index b4d2a89..e5d9d77 100644
--- a/drivers/pci/dwc/pcie-designware.h
+++ b/drivers/pci/dwc/pcie-designware.h
@@ -1,5 +1,5 @@
 /*
- * Synopsys Designware PCIe host controller driver
+ * Synopsys DesignWare PCIe host controller driver
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
@@ -76,6 +76,9 @@
 #define PCIE_ATU_FUNC(x)		(((x) & 0x7) << 16)
 #define PCIE_ATU_UPPER_TARGET		0x91C
 
+#define PCIE_MISC_CONTROL_1_OFF		0x8BC
+#define PCIE_DBI_RO_WR_EN		(0x1 << 0)
+
 /*
  * iATU Unroll-specific register definitions
  * From 4.80 core version the address translation will be made by unroll
@@ -134,7 +137,7 @@ struct dw_pcie_host_ops {
 			     unsigned int devfn, int where, int size, u32 *val);
 	int (*wr_other_conf)(struct pcie_port *pp, struct pci_bus *bus,
 			     unsigned int devfn, int where, int size, u32 val);
-	void (*host_init)(struct pcie_port *pp);
+	int (*host_init)(struct pcie_port *pp);
 	void (*msi_set_irq)(struct pcie_port *pp, int irq);
 	void (*msi_clear_irq)(struct pcie_port *pp, int irq);
 	phys_addr_t (*get_msi_addr)(struct pcie_port *pp);
@@ -186,6 +189,7 @@ struct dw_pcie_ep {
 	struct dw_pcie_ep_ops	*ops;
 	phys_addr_t		phys_base;
 	size_t			addr_size;
+	size_t			page_size;
 	u8			bar_to_atu[6];
 	phys_addr_t		*outbound_addr;
 	unsigned long		ib_window_map;
@@ -279,6 +283,28 @@ static inline u32 dw_pcie_readl_dbi2(struct dw_pcie *pci, u32 reg)
 	return __dw_pcie_read_dbi(pci, pci->dbi_base2, reg, 0x4);
 }
 
+static inline void dw_pcie_dbi_ro_wr_en(struct dw_pcie *pci)
+{
+	u32 reg;
+	u32 val;
+
+	reg = PCIE_MISC_CONTROL_1_OFF;
+	val = dw_pcie_readl_dbi(pci, reg);
+	val |= PCIE_DBI_RO_WR_EN;
+	dw_pcie_writel_dbi(pci, reg, val);
+}
+
+static inline void dw_pcie_dbi_ro_wr_dis(struct dw_pcie *pci)
+{
+	u32 reg;
+	u32 val;
+
+	reg = PCIE_MISC_CONTROL_1_OFF;
+	val = dw_pcie_readl_dbi(pci, reg);
+	val &= ~PCIE_DBI_RO_WR_EN;
+	dw_pcie_writel_dbi(pci, reg, val);
+}
+
 #ifdef CONFIG_PCIE_DW_HOST
 irqreturn_t dw_handle_msi_irq(struct pcie_port *pp);
 void dw_pcie_msi_init(struct pcie_port *pp);
diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c
index e51acee..a201791 100644
--- a/drivers/pci/dwc/pcie-hisi.c
+++ b/drivers/pci/dwc/pcie-hisi.c
@@ -223,7 +223,7 @@ static int hisi_pcie_link_up(struct dw_pcie *pci)
 	return hisi_pcie->soc_ops->hisi_pcie_link_up(hisi_pcie);
 }
 
-static struct dw_pcie_host_ops hisi_pcie_host_ops = {
+static const struct dw_pcie_host_ops hisi_pcie_host_ops = {
 	.rd_own_conf = hisi_pcie_cfg_read,
 	.wr_own_conf = hisi_pcie_cfg_write,
 };
@@ -268,7 +268,6 @@ static int hisi_pcie_probe(struct platform_device *pdev)
 	struct dw_pcie *pci;
 	struct hisi_pcie *hisi_pcie;
 	struct resource *reg;
-	struct device_driver *driver;
 	int ret;
 
 	hisi_pcie = devm_kzalloc(dev, sizeof(*hisi_pcie), GFP_KERNEL);
@@ -282,8 +281,6 @@ static int hisi_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
-	driver = dev->driver;
-
 	hisi_pcie->pci = pci;
 
 	hisi_pcie->soc_ops = of_device_get_match_data(dev);
diff --git a/drivers/pci/dwc/pcie-kirin.c b/drivers/pci/dwc/pcie-kirin.c
index 33fddb9..dc3033c 100644
--- a/drivers/pci/dwc/pcie-kirin.c
+++ b/drivers/pci/dwc/pcie-kirin.c
@@ -430,9 +430,11 @@ static int kirin_pcie_establish_link(struct pcie_port *pp)
 	return 0;
 }
 
-static void kirin_pcie_host_init(struct pcie_port *pp)
+static int kirin_pcie_host_init(struct pcie_port *pp)
 {
 	kirin_pcie_establish_link(pp);
+
+	return 0;
 }
 
 static struct dw_pcie_ops kirin_dw_pcie_ops = {
@@ -441,7 +443,7 @@ static struct dw_pcie_ops kirin_dw_pcie_ops = {
 	.link_up = kirin_pcie_link_up,
 };
 
-static struct dw_pcie_host_ops kirin_pcie_host_ops = {
+static const struct dw_pcie_host_ops kirin_pcie_host_ops = {
 	.rd_own_conf = kirin_pcie_rd_own_conf,
 	.wr_own_conf = kirin_pcie_wr_own_conf,
 	.host_init = kirin_pcie_host_init,
diff --git a/drivers/pci/dwc/pcie-qcom.c b/drivers/pci/dwc/pcie-qcom.c
index 68c5f2a..ce7ba5b 100644
--- a/drivers/pci/dwc/pcie-qcom.c
+++ b/drivers/pci/dwc/pcie-qcom.c
@@ -37,6 +37,20 @@
 #include "pcie-designware.h"
 
 #define PCIE20_PARF_SYS_CTRL			0x00
+#define MST_WAKEUP_EN				BIT(13)
+#define SLV_WAKEUP_EN				BIT(12)
+#define MSTR_ACLK_CGC_DIS			BIT(10)
+#define SLV_ACLK_CGC_DIS			BIT(9)
+#define CORE_CLK_CGC_DIS			BIT(6)
+#define AUX_PWR_DET				BIT(4)
+#define L23_CLK_RMV_DIS				BIT(2)
+#define L1_CLK_RMV_DIS				BIT(1)
+
+#define PCIE20_COMMAND_STATUS			0x04
+#define CMD_BME_VAL				0x4
+#define PCIE20_DEVICE_CONTROL2_STATUS2		0x98
+#define PCIE_CAP_CPL_TIMEOUT_DISABLE		0x10
+
 #define PCIE20_PARF_PHY_CTRL			0x40
 #define PCIE20_PARF_PHY_REFCLK			0x4C
 #define PCIE20_PARF_DBI_BASE_ADDR		0x168
@@ -58,10 +72,22 @@
 #define CFG_BRIDGE_SB_INIT			BIT(0)
 
 #define PCIE20_CAP				0x70
+#define PCIE20_CAP_LINK_CAPABILITIES		(PCIE20_CAP + 0xC)
+#define PCIE20_CAP_ACTIVE_STATE_LINK_PM_SUPPORT	(BIT(10) | BIT(11))
+#define PCIE20_CAP_LINK_1			(PCIE20_CAP + 0x14)
+#define PCIE_CAP_LINK1_VAL			0x2FD7F
+
+#define PCIE20_PARF_Q2A_FLUSH			0x1AC
+
+#define PCIE20_MISC_CONTROL_1_REG		0x8BC
+#define DBI_RO_WR_EN				1
 
 #define PERST_DELAY_US				1000
 
-struct qcom_pcie_resources_v0 {
+#define PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE	0x358
+#define SLV_ADDR_SPACE_SZ			0x10000000
+
+struct qcom_pcie_resources_2_1_0 {
 	struct clk *iface_clk;
 	struct clk *core_clk;
 	struct clk *phy_clk;
@@ -75,7 +101,7 @@ struct qcom_pcie_resources_v0 {
 	struct regulator *vdda_refclk;
 };
 
-struct qcom_pcie_resources_v1 {
+struct qcom_pcie_resources_1_0_0 {
 	struct clk *iface;
 	struct clk *aux;
 	struct clk *master_bus;
@@ -84,7 +110,7 @@ struct qcom_pcie_resources_v1 {
 	struct regulator *vdda;
 };
 
-struct qcom_pcie_resources_v2 {
+struct qcom_pcie_resources_2_3_2 {
 	struct clk *aux_clk;
 	struct clk *master_clk;
 	struct clk *slave_clk;
@@ -92,7 +118,7 @@ struct qcom_pcie_resources_v2 {
 	struct clk *pipe_clk;
 };
 
-struct qcom_pcie_resources_v3 {
+struct qcom_pcie_resources_2_4_0 {
 	struct clk *aux_clk;
 	struct clk *master_clk;
 	struct clk *slave_clk;
@@ -110,11 +136,21 @@ struct qcom_pcie_resources_v3 {
 	struct reset_control *phy_ahb_reset;
 };
 
+struct qcom_pcie_resources_2_3_3 {
+	struct clk *iface;
+	struct clk *axi_m_clk;
+	struct clk *axi_s_clk;
+	struct clk *ahb_clk;
+	struct clk *aux_clk;
+	struct reset_control *rst[7];
+};
+
 union qcom_pcie_resources {
-	struct qcom_pcie_resources_v0 v0;
-	struct qcom_pcie_resources_v1 v1;
-	struct qcom_pcie_resources_v2 v2;
-	struct qcom_pcie_resources_v3 v3;
+	struct qcom_pcie_resources_1_0_0 v1_0_0;
+	struct qcom_pcie_resources_2_1_0 v2_1_0;
+	struct qcom_pcie_resources_2_3_2 v2_3_2;
+	struct qcom_pcie_resources_2_3_3 v2_3_3;
+	struct qcom_pcie_resources_2_4_0 v2_4_0;
 };
 
 struct qcom_pcie;
@@ -124,6 +160,7 @@ struct qcom_pcie_ops {
 	int (*init)(struct qcom_pcie *pcie);
 	int (*post_init)(struct qcom_pcie *pcie);
 	void (*deinit)(struct qcom_pcie *pcie);
+	void (*post_deinit)(struct qcom_pcie *pcie);
 	void (*ltssm_enable)(struct qcom_pcie *pcie);
 };
 
@@ -141,13 +178,13 @@ struct qcom_pcie {
 
 static void qcom_ep_reset_assert(struct qcom_pcie *pcie)
 {
-	gpiod_set_value(pcie->reset, 1);
+	gpiod_set_value_cansleep(pcie->reset, 1);
 	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
 }
 
 static void qcom_ep_reset_deassert(struct qcom_pcie *pcie)
 {
-	gpiod_set_value(pcie->reset, 0);
+	gpiod_set_value_cansleep(pcie->reset, 0);
 	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
 }
 
@@ -172,7 +209,7 @@ static int qcom_pcie_establish_link(struct qcom_pcie *pcie)
 	return dw_pcie_wait_for_link(pci);
 }
 
-static void qcom_pcie_v0_v1_ltssm_enable(struct qcom_pcie *pcie)
+static void qcom_pcie_2_1_0_ltssm_enable(struct qcom_pcie *pcie)
 {
 	u32 val;
 
@@ -182,9 +219,9 @@ static void qcom_pcie_v0_v1_ltssm_enable(struct qcom_pcie *pcie)
 	writel(val, pcie->elbi + PCIE20_ELBI_SYS_CTRL);
 }
 
-static int qcom_pcie_get_resources_v0(struct qcom_pcie *pcie)
+static int qcom_pcie_get_resources_2_1_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v0 *res = &pcie->res.v0;
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 
@@ -212,29 +249,29 @@ static int qcom_pcie_get_resources_v0(struct qcom_pcie *pcie)
 	if (IS_ERR(res->phy_clk))
 		return PTR_ERR(res->phy_clk);
 
-	res->pci_reset = devm_reset_control_get(dev, "pci");
+	res->pci_reset = devm_reset_control_get_exclusive(dev, "pci");
 	if (IS_ERR(res->pci_reset))
 		return PTR_ERR(res->pci_reset);
 
-	res->axi_reset = devm_reset_control_get(dev, "axi");
+	res->axi_reset = devm_reset_control_get_exclusive(dev, "axi");
 	if (IS_ERR(res->axi_reset))
 		return PTR_ERR(res->axi_reset);
 
-	res->ahb_reset = devm_reset_control_get(dev, "ahb");
+	res->ahb_reset = devm_reset_control_get_exclusive(dev, "ahb");
 	if (IS_ERR(res->ahb_reset))
 		return PTR_ERR(res->ahb_reset);
 
-	res->por_reset = devm_reset_control_get(dev, "por");
+	res->por_reset = devm_reset_control_get_exclusive(dev, "por");
 	if (IS_ERR(res->por_reset))
 		return PTR_ERR(res->por_reset);
 
-	res->phy_reset = devm_reset_control_get(dev, "phy");
+	res->phy_reset = devm_reset_control_get_exclusive(dev, "phy");
 	return PTR_ERR_OR_ZERO(res->phy_reset);
 }
 
-static void qcom_pcie_deinit_v0(struct qcom_pcie *pcie)
+static void qcom_pcie_deinit_2_1_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v0 *res = &pcie->res.v0;
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
 
 	reset_control_assert(res->pci_reset);
 	reset_control_assert(res->axi_reset);
@@ -249,9 +286,9 @@ static void qcom_pcie_deinit_v0(struct qcom_pcie *pcie)
 	regulator_disable(res->vdda_refclk);
 }
 
-static int qcom_pcie_init_v0(struct qcom_pcie *pcie)
+static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v0 *res = &pcie->res.v0;
+	struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 	u32 val;
@@ -367,9 +404,9 @@ static int qcom_pcie_init_v0(struct qcom_pcie *pcie)
 	return ret;
 }
 
-static int qcom_pcie_get_resources_v1(struct qcom_pcie *pcie)
+static int qcom_pcie_get_resources_1_0_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v1 *res = &pcie->res.v1;
+	struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 
@@ -393,13 +430,13 @@ static int qcom_pcie_get_resources_v1(struct qcom_pcie *pcie)
 	if (IS_ERR(res->slave_bus))
 		return PTR_ERR(res->slave_bus);
 
-	res->core = devm_reset_control_get(dev, "core");
+	res->core = devm_reset_control_get_exclusive(dev, "core");
 	return PTR_ERR_OR_ZERO(res->core);
 }
 
-static void qcom_pcie_deinit_v1(struct qcom_pcie *pcie)
+static void qcom_pcie_deinit_1_0_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v1 *res = &pcie->res.v1;
+	struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0;
 
 	reset_control_assert(res->core);
 	clk_disable_unprepare(res->slave_bus);
@@ -409,9 +446,9 @@ static void qcom_pcie_deinit_v1(struct qcom_pcie *pcie)
 	regulator_disable(res->vdda);
 }
 
-static int qcom_pcie_init_v1(struct qcom_pcie *pcie)
+static int qcom_pcie_init_1_0_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v1 *res = &pcie->res.v1;
+	struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 	int ret;
@@ -477,7 +514,7 @@ static int qcom_pcie_init_v1(struct qcom_pcie *pcie)
 	return ret;
 }
 
-static void qcom_pcie_v2_ltssm_enable(struct qcom_pcie *pcie)
+static void qcom_pcie_2_3_2_ltssm_enable(struct qcom_pcie *pcie)
 {
 	u32 val;
 
@@ -487,9 +524,9 @@ static void qcom_pcie_v2_ltssm_enable(struct qcom_pcie *pcie)
 	writel(val, pcie->parf + PCIE20_PARF_LTSSM);
 }
 
-static int qcom_pcie_get_resources_v2(struct qcom_pcie *pcie)
+static int qcom_pcie_get_resources_2_3_2(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v2 *res = &pcie->res.v2;
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 
@@ -513,20 +550,26 @@ static int qcom_pcie_get_resources_v2(struct qcom_pcie *pcie)
 	return PTR_ERR_OR_ZERO(res->pipe_clk);
 }
 
-static void qcom_pcie_deinit_v2(struct qcom_pcie *pcie)
+static void qcom_pcie_deinit_2_3_2(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v2 *res = &pcie->res.v2;
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
 
-	clk_disable_unprepare(res->pipe_clk);
 	clk_disable_unprepare(res->slave_clk);
 	clk_disable_unprepare(res->master_clk);
 	clk_disable_unprepare(res->cfg_clk);
 	clk_disable_unprepare(res->aux_clk);
 }
 
-static int qcom_pcie_init_v2(struct qcom_pcie *pcie)
+static void qcom_pcie_post_deinit_2_3_2(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v2 *res = &pcie->res.v2;
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
+
+	clk_disable_unprepare(res->pipe_clk);
+}
+
+static int qcom_pcie_init_2_3_2(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 	u32 val;
@@ -589,9 +632,9 @@ static int qcom_pcie_init_v2(struct qcom_pcie *pcie)
 	return ret;
 }
 
-static int qcom_pcie_post_init_v2(struct qcom_pcie *pcie)
+static int qcom_pcie_post_init_2_3_2(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v2 *res = &pcie->res.v2;
+	struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 	int ret;
@@ -605,9 +648,9 @@ static int qcom_pcie_post_init_v2(struct qcom_pcie *pcie)
 	return 0;
 }
 
-static int qcom_pcie_get_resources_v3(struct qcom_pcie *pcie)
+static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v3 *res = &pcie->res.v3;
+	struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 
@@ -623,60 +666,64 @@ static int qcom_pcie_get_resources_v3(struct qcom_pcie *pcie)
 	if (IS_ERR(res->slave_clk))
 		return PTR_ERR(res->slave_clk);
 
-	res->axi_m_reset = devm_reset_control_get(dev, "axi_m");
+	res->axi_m_reset = devm_reset_control_get_exclusive(dev, "axi_m");
 	if (IS_ERR(res->axi_m_reset))
 		return PTR_ERR(res->axi_m_reset);
 
-	res->axi_s_reset = devm_reset_control_get(dev, "axi_s");
+	res->axi_s_reset = devm_reset_control_get_exclusive(dev, "axi_s");
 	if (IS_ERR(res->axi_s_reset))
 		return PTR_ERR(res->axi_s_reset);
 
-	res->pipe_reset = devm_reset_control_get(dev, "pipe");
+	res->pipe_reset = devm_reset_control_get_exclusive(dev, "pipe");
 	if (IS_ERR(res->pipe_reset))
 		return PTR_ERR(res->pipe_reset);
 
-	res->axi_m_vmid_reset = devm_reset_control_get(dev, "axi_m_vmid");
+	res->axi_m_vmid_reset = devm_reset_control_get_exclusive(dev,
+								 "axi_m_vmid");
 	if (IS_ERR(res->axi_m_vmid_reset))
 		return PTR_ERR(res->axi_m_vmid_reset);
 
-	res->axi_s_xpu_reset = devm_reset_control_get(dev, "axi_s_xpu");
+	res->axi_s_xpu_reset = devm_reset_control_get_exclusive(dev,
+								"axi_s_xpu");
 	if (IS_ERR(res->axi_s_xpu_reset))
 		return PTR_ERR(res->axi_s_xpu_reset);
 
-	res->parf_reset = devm_reset_control_get(dev, "parf");
+	res->parf_reset = devm_reset_control_get_exclusive(dev, "parf");
 	if (IS_ERR(res->parf_reset))
 		return PTR_ERR(res->parf_reset);
 
-	res->phy_reset = devm_reset_control_get(dev, "phy");
+	res->phy_reset = devm_reset_control_get_exclusive(dev, "phy");
 	if (IS_ERR(res->phy_reset))
 		return PTR_ERR(res->phy_reset);
 
-	res->axi_m_sticky_reset = devm_reset_control_get(dev, "axi_m_sticky");
+	res->axi_m_sticky_reset = devm_reset_control_get_exclusive(dev,
+								   "axi_m_sticky");
 	if (IS_ERR(res->axi_m_sticky_reset))
 		return PTR_ERR(res->axi_m_sticky_reset);
 
-	res->pipe_sticky_reset = devm_reset_control_get(dev, "pipe_sticky");
+	res->pipe_sticky_reset = devm_reset_control_get_exclusive(dev,
+								  "pipe_sticky");
 	if (IS_ERR(res->pipe_sticky_reset))
 		return PTR_ERR(res->pipe_sticky_reset);
 
-	res->pwr_reset = devm_reset_control_get(dev, "pwr");
+	res->pwr_reset = devm_reset_control_get_exclusive(dev, "pwr");
 	if (IS_ERR(res->pwr_reset))
 		return PTR_ERR(res->pwr_reset);
 
-	res->ahb_reset = devm_reset_control_get(dev, "ahb");
+	res->ahb_reset = devm_reset_control_get_exclusive(dev, "ahb");
 	if (IS_ERR(res->ahb_reset))
 		return PTR_ERR(res->ahb_reset);
 
-	res->phy_ahb_reset = devm_reset_control_get(dev, "phy_ahb");
+	res->phy_ahb_reset = devm_reset_control_get_exclusive(dev, "phy_ahb");
 	if (IS_ERR(res->phy_ahb_reset))
 		return PTR_ERR(res->phy_ahb_reset);
 
 	return 0;
 }
 
-static void qcom_pcie_deinit_v3(struct qcom_pcie *pcie)
+static void qcom_pcie_deinit_2_4_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v3 *res = &pcie->res.v3;
+	struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
 
 	reset_control_assert(res->axi_m_reset);
 	reset_control_assert(res->axi_s_reset);
@@ -692,9 +739,9 @@ static void qcom_pcie_deinit_v3(struct qcom_pcie *pcie)
 	clk_disable_unprepare(res->slave_clk);
 }
 
-static int qcom_pcie_init_v3(struct qcom_pcie *pcie)
+static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie)
 {
-	struct qcom_pcie_resources_v3 *res = &pcie->res.v3;
+	struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 	u32 val;
@@ -884,6 +931,166 @@ static int qcom_pcie_init_v3(struct qcom_pcie *pcie)
 	return ret;
 }
 
+static int qcom_pcie_get_resources_2_3_3(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int i;
+	const char *rst_names[] = { "axi_m", "axi_s", "pipe",
+				    "axi_m_sticky", "sticky",
+				    "ahb", "sleep", };
+
+	res->iface = devm_clk_get(dev, "iface");
+	if (IS_ERR(res->iface))
+		return PTR_ERR(res->iface);
+
+	res->axi_m_clk = devm_clk_get(dev, "axi_m");
+	if (IS_ERR(res->axi_m_clk))
+		return PTR_ERR(res->axi_m_clk);
+
+	res->axi_s_clk = devm_clk_get(dev, "axi_s");
+	if (IS_ERR(res->axi_s_clk))
+		return PTR_ERR(res->axi_s_clk);
+
+	res->ahb_clk = devm_clk_get(dev, "ahb");
+	if (IS_ERR(res->ahb_clk))
+		return PTR_ERR(res->ahb_clk);
+
+	res->aux_clk = devm_clk_get(dev, "aux");
+	if (IS_ERR(res->aux_clk))
+		return PTR_ERR(res->aux_clk);
+
+	for (i = 0; i < ARRAY_SIZE(rst_names); i++) {
+		res->rst[i] = devm_reset_control_get(dev, rst_names[i]);
+		if (IS_ERR(res->rst[i]))
+			return PTR_ERR(res->rst[i]);
+	}
+
+	return 0;
+}
+
+static void qcom_pcie_deinit_2_3_3(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+
+	clk_disable_unprepare(res->iface);
+	clk_disable_unprepare(res->axi_m_clk);
+	clk_disable_unprepare(res->axi_s_clk);
+	clk_disable_unprepare(res->ahb_clk);
+	clk_disable_unprepare(res->aux_clk);
+}
+
+static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+{
+	struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+	struct dw_pcie *pci = pcie->pci;
+	struct device *dev = pci->dev;
+	int i, ret;
+	u32 val;
+
+	for (i = 0; i < ARRAY_SIZE(res->rst); i++) {
+		ret = reset_control_assert(res->rst[i]);
+		if (ret) {
+			dev_err(dev, "reset #%d assert failed (%d)\n", i, ret);
+			return ret;
+		}
+	}
+
+	usleep_range(2000, 2500);
+
+	for (i = 0; i < ARRAY_SIZE(res->rst); i++) {
+		ret = reset_control_deassert(res->rst[i]);
+		if (ret) {
+			dev_err(dev, "reset #%d deassert failed (%d)\n", i,
+				ret);
+			return ret;
+		}
+	}
+
+	/*
+	 * Don't have a way to see if the reset has completed.
+	 * Wait for some time.
+	 */
+	usleep_range(2000, 2500);
+
+	ret = clk_prepare_enable(res->iface);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable core clock\n");
+		goto err_clk_iface;
+	}
+
+	ret = clk_prepare_enable(res->axi_m_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable core clock\n");
+		goto err_clk_axi_m;
+	}
+
+	ret = clk_prepare_enable(res->axi_s_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable axi slave clock\n");
+		goto err_clk_axi_s;
+	}
+
+	ret = clk_prepare_enable(res->ahb_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable ahb clock\n");
+		goto err_clk_ahb;
+	}
+
+	ret = clk_prepare_enable(res->aux_clk);
+	if (ret) {
+		dev_err(dev, "cannot prepare/enable aux clock\n");
+		goto err_clk_aux;
+	}
+
+	writel(SLV_ADDR_SPACE_SZ,
+		pcie->parf + PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE);
+
+	val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL);
+	val &= ~BIT(0);
+	writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL);
+
+	writel(0, pcie->parf + PCIE20_PARF_DBI_BASE_ADDR);
+
+	writel(MST_WAKEUP_EN | SLV_WAKEUP_EN | MSTR_ACLK_CGC_DIS
+		| SLV_ACLK_CGC_DIS | CORE_CLK_CGC_DIS |
+		AUX_PWR_DET | L23_CLK_RMV_DIS | L1_CLK_RMV_DIS,
+		pcie->parf + PCIE20_PARF_SYS_CTRL);
+	writel(0, pcie->parf + PCIE20_PARF_Q2A_FLUSH);
+
+	writel(CMD_BME_VAL, pci->dbi_base + PCIE20_COMMAND_STATUS);
+	writel(DBI_RO_WR_EN, pci->dbi_base + PCIE20_MISC_CONTROL_1_REG);
+	writel(PCIE_CAP_LINK1_VAL, pci->dbi_base + PCIE20_CAP_LINK_1);
+
+	val = readl(pci->dbi_base + PCIE20_CAP_LINK_CAPABILITIES);
+	val &= ~PCIE20_CAP_ACTIVE_STATE_LINK_PM_SUPPORT;
+	writel(val, pci->dbi_base + PCIE20_CAP_LINK_CAPABILITIES);
+
+	writel(PCIE_CAP_CPL_TIMEOUT_DISABLE, pci->dbi_base +
+		PCIE20_DEVICE_CONTROL2_STATUS2);
+
+	return 0;
+
+err_clk_aux:
+	clk_disable_unprepare(res->ahb_clk);
+err_clk_ahb:
+	clk_disable_unprepare(res->axi_s_clk);
+err_clk_axi_s:
+	clk_disable_unprepare(res->axi_m_clk);
+err_clk_axi_m:
+	clk_disable_unprepare(res->iface);
+err_clk_iface:
+	/*
+	 * Not checking for failure, will anyway return
+	 * the original failure in 'ret'.
+	 */
+	for (i = 0; i < ARRAY_SIZE(res->rst); i++)
+		reset_control_assert(res->rst[i]);
+
+	return ret;
+}
+
 static int qcom_pcie_link_up(struct dw_pcie *pci)
 {
 	u16 val = readw(pci->dbi_base + PCIE20_CAP + PCI_EXP_LNKSTA);
@@ -891,7 +1098,7 @@ static int qcom_pcie_link_up(struct dw_pcie *pci)
 	return !!(val & PCI_EXP_LNKSTA_DLLLA);
 }
 
-static void qcom_pcie_host_init(struct pcie_port *pp)
+static int qcom_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct qcom_pcie *pcie = to_qcom_pcie(pci);
@@ -901,14 +1108,17 @@ static void qcom_pcie_host_init(struct pcie_port *pp)
 
 	ret = pcie->ops->init(pcie);
 	if (ret)
-		goto err_deinit;
+		return ret;
 
 	ret = phy_power_on(pcie->phy);
 	if (ret)
 		goto err_deinit;
 
-	if (pcie->ops->post_init)
-		pcie->ops->post_init(pcie);
+	if (pcie->ops->post_init) {
+		ret = pcie->ops->post_init(pcie);
+		if (ret)
+			goto err_disable_phy;
+	}
 
 	dw_pcie_setup_rc(pp);
 
@@ -921,12 +1131,17 @@ static void qcom_pcie_host_init(struct pcie_port *pp)
 	if (ret)
 		goto err;
 
-	return;
+	return 0;
 err:
 	qcom_ep_reset_assert(pcie);
+	if (pcie->ops->post_deinit)
+		pcie->ops->post_deinit(pcie);
+err_disable_phy:
 	phy_power_off(pcie->phy);
 err_deinit:
 	pcie->ops->deinit(pcie);
+
+	return ret;
 }
 
 static int qcom_pcie_rd_own_conf(struct pcie_port *pp, int where, int size,
@@ -950,39 +1165,52 @@ static const struct dw_pcie_host_ops qcom_pcie_dw_ops = {
 	.rd_own_conf = qcom_pcie_rd_own_conf,
 };
 
-static const struct qcom_pcie_ops ops_v0 = {
-	.get_resources = qcom_pcie_get_resources_v0,
-	.init = qcom_pcie_init_v0,
-	.deinit = qcom_pcie_deinit_v0,
-	.ltssm_enable = qcom_pcie_v0_v1_ltssm_enable,
+/* Qcom IP rev.: 2.1.0	Synopsys IP rev.: 4.01a */
+static const struct qcom_pcie_ops ops_2_1_0 = {
+	.get_resources = qcom_pcie_get_resources_2_1_0,
+	.init = qcom_pcie_init_2_1_0,
+	.deinit = qcom_pcie_deinit_2_1_0,
+	.ltssm_enable = qcom_pcie_2_1_0_ltssm_enable,
 };
 
-static const struct qcom_pcie_ops ops_v1 = {
-	.get_resources = qcom_pcie_get_resources_v1,
-	.init = qcom_pcie_init_v1,
-	.deinit = qcom_pcie_deinit_v1,
-	.ltssm_enable = qcom_pcie_v0_v1_ltssm_enable,
+/* Qcom IP rev.: 1.0.0	Synopsys IP rev.: 4.11a */
+static const struct qcom_pcie_ops ops_1_0_0 = {
+	.get_resources = qcom_pcie_get_resources_1_0_0,
+	.init = qcom_pcie_init_1_0_0,
+	.deinit = qcom_pcie_deinit_1_0_0,
+	.ltssm_enable = qcom_pcie_2_1_0_ltssm_enable,
 };
 
-static const struct qcom_pcie_ops ops_v2 = {
-	.get_resources = qcom_pcie_get_resources_v2,
-	.init = qcom_pcie_init_v2,
-	.post_init = qcom_pcie_post_init_v2,
-	.deinit = qcom_pcie_deinit_v2,
-	.ltssm_enable = qcom_pcie_v2_ltssm_enable,
+/* Qcom IP rev.: 2.3.2	Synopsys IP rev.: 4.21a */
+static const struct qcom_pcie_ops ops_2_3_2 = {
+	.get_resources = qcom_pcie_get_resources_2_3_2,
+	.init = qcom_pcie_init_2_3_2,
+	.post_init = qcom_pcie_post_init_2_3_2,
+	.deinit = qcom_pcie_deinit_2_3_2,
+	.post_deinit = qcom_pcie_post_deinit_2_3_2,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+/* Qcom IP rev.: 2.4.0	Synopsys IP rev.: 4.20a */
+static const struct qcom_pcie_ops ops_2_4_0 = {
+	.get_resources = qcom_pcie_get_resources_2_4_0,
+	.init = qcom_pcie_init_2_4_0,
+	.deinit = qcom_pcie_deinit_2_4_0,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+};
+
+/* Qcom IP rev.: 2.3.3	Synopsys IP rev.: 4.30a */
+static const struct qcom_pcie_ops ops_2_3_3 = {
+	.get_resources = qcom_pcie_get_resources_2_3_3,
+	.init = qcom_pcie_init_2_3_3,
+	.deinit = qcom_pcie_deinit_2_3_3,
+	.ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
 };
 
 static const struct dw_pcie_ops dw_pcie_ops = {
 	.link_up = qcom_pcie_link_up,
 };
 
-static const struct qcom_pcie_ops ops_v3 = {
-	.get_resources = qcom_pcie_get_resources_v3,
-	.init = qcom_pcie_init_v3,
-	.deinit = qcom_pcie_deinit_v3,
-	.ltssm_enable = qcom_pcie_v2_ltssm_enable,
-};
-
 static int qcom_pcie_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -1069,11 +1297,12 @@ static int qcom_pcie_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id qcom_pcie_match[] = {
-	{ .compatible = "qcom,pcie-ipq8064", .data = &ops_v0 },
-	{ .compatible = "qcom,pcie-apq8064", .data = &ops_v0 },
-	{ .compatible = "qcom,pcie-apq8084", .data = &ops_v1 },
-	{ .compatible = "qcom,pcie-msm8996", .data = &ops_v2 },
-	{ .compatible = "qcom,pcie-ipq4019", .data = &ops_v3 },
+	{ .compatible = "qcom,pcie-apq8084", .data = &ops_1_0_0 },
+	{ .compatible = "qcom,pcie-ipq8064", .data = &ops_2_1_0 },
+	{ .compatible = "qcom,pcie-apq8064", .data = &ops_2_1_0 },
+	{ .compatible = "qcom,pcie-msm8996", .data = &ops_2_3_2 },
+	{ .compatible = "qcom,pcie-ipq8074", .data = &ops_2_3_3 },
+	{ .compatible = "qcom,pcie-ipq4019", .data = &ops_2_4_0 },
 	{ }
 };
 
diff --git a/drivers/pci/dwc/pcie-spear13xx.c b/drivers/pci/dwc/pcie-spear13xx.c
index 8089729..709189d 100644
--- a/drivers/pci/dwc/pcie-spear13xx.c
+++ b/drivers/pci/dwc/pcie-spear13xx.c
@@ -177,13 +177,15 @@ static int spear13xx_pcie_link_up(struct dw_pcie *pci)
 	return 0;
 }
 
-static void spear13xx_pcie_host_init(struct pcie_port *pp)
+static int spear13xx_pcie_host_init(struct pcie_port *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci);
 
 	spear13xx_pcie_establish_link(spear13xx_pcie);
 	spear13xx_pcie_enable_interrupts(spear13xx_pcie);
+
+	return 0;
 }
 
 static const struct dw_pcie_host_ops spear13xx_pcie_host_ops = {
@@ -199,9 +201,9 @@ static int spear13xx_add_pcie_port(struct spear13xx_pcie *spear13xx_pcie,
 	int ret;
 
 	pp->irq = platform_get_irq(pdev, 0);
-	if (!pp->irq) {
+	if (pp->irq < 0) {
 		dev_err(dev, "failed to get irq\n");
-		return -ENODEV;
+		return pp->irq;
 	}
 	ret = devm_request_irq(dev, pp->irq, spear13xx_pcie_irq_handler,
 			       IRQF_SHARED | IRQF_NO_THREAD,
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 53fff80..4ddc6e8 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -54,6 +54,8 @@ static struct workqueue_struct *kpcitest_workqueue;
 struct pci_epf_test {
 	void			*reg[6];
 	struct pci_epf		*epf;
+	enum pci_barno		test_reg_bar;
+	bool			linkup_notifier;
 	struct delayed_work	cmd_handler;
 };
 
@@ -74,7 +76,12 @@ static struct pci_epf_header test_header = {
 	.interrupt_pin	= PCI_INTERRUPT_INTA,
 };
 
-static int bar_size[] = { 512, 1024, 16384, 131072, 1048576 };
+struct pci_epf_test_data {
+	enum pci_barno	test_reg_bar;
+	bool		linkup_notifier;
+};
+
+static int bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 };
 
 static int pci_epf_test_copy(struct pci_epf_test *epf_test)
 {
@@ -86,7 +93,8 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test)
 	struct pci_epf *epf = epf_test->epf;
 	struct device *dev = &epf->dev;
 	struct pci_epc *epc = epf->epc;
-	struct pci_epf_test_reg *reg = epf_test->reg[0];
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar];
 
 	src_addr = pci_epc_mem_alloc_addr(epc, &src_phys_addr, reg->size);
 	if (!src_addr) {
@@ -145,7 +153,8 @@ static int pci_epf_test_read(struct pci_epf_test *epf_test)
 	struct pci_epf *epf = epf_test->epf;
 	struct device *dev = &epf->dev;
 	struct pci_epc *epc = epf->epc;
-	struct pci_epf_test_reg *reg = epf_test->reg[0];
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar];
 
 	src_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size);
 	if (!src_addr) {
@@ -195,7 +204,8 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test)
 	struct pci_epf *epf = epf_test->epf;
 	struct device *dev = &epf->dev;
 	struct pci_epc *epc = epf->epc;
-	struct pci_epf_test_reg *reg = epf_test->reg[0];
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar];
 
 	dst_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size);
 	if (!dst_addr) {
@@ -247,7 +257,8 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
 	u8 msi_count;
 	struct pci_epf *epf = epf_test->epf;
 	struct pci_epc *epc = epf->epc;
-	struct pci_epf_test_reg *reg = epf_test->reg[0];
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar];
 
 	reg->status |= STATUS_IRQ_RAISED;
 	msi_count = pci_epc_get_msi(epc);
@@ -263,22 +274,28 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	int ret;
 	u8 irq;
 	u8 msi_count;
+	u32 command;
 	struct pci_epf_test *epf_test = container_of(work, struct pci_epf_test,
 						     cmd_handler.work);
 	struct pci_epf *epf = epf_test->epf;
 	struct pci_epc *epc = epf->epc;
-	struct pci_epf_test_reg *reg = epf_test->reg[0];
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
+	struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar];
 
-	if (!reg->command)
+	command = reg->command;
+	if (!command)
 		goto reset_handler;
 
-	if (reg->command & COMMAND_RAISE_LEGACY_IRQ) {
+	reg->command = 0;
+	reg->status = 0;
+
+	if (command & COMMAND_RAISE_LEGACY_IRQ) {
 		reg->status = STATUS_IRQ_RAISED;
 		pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
 		goto reset_handler;
 	}
 
-	if (reg->command & COMMAND_WRITE) {
+	if (command & COMMAND_WRITE) {
 		ret = pci_epf_test_write(epf_test);
 		if (ret)
 			reg->status |= STATUS_WRITE_FAIL;
@@ -288,7 +305,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 		goto reset_handler;
 	}
 
-	if (reg->command & COMMAND_READ) {
+	if (command & COMMAND_READ) {
 		ret = pci_epf_test_read(epf_test);
 		if (!ret)
 			reg->status |= STATUS_READ_SUCCESS;
@@ -298,7 +315,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 		goto reset_handler;
 	}
 
-	if (reg->command & COMMAND_COPY) {
+	if (command & COMMAND_COPY) {
 		ret = pci_epf_test_copy(epf_test);
 		if (!ret)
 			reg->status |= STATUS_COPY_SUCCESS;
@@ -308,9 +325,9 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 		goto reset_handler;
 	}
 
-	if (reg->command & COMMAND_RAISE_MSI_IRQ) {
+	if (command & COMMAND_RAISE_MSI_IRQ) {
 		msi_count = pci_epc_get_msi(epc);
-		irq = (reg->command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
+		irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
 		if (irq > msi_count || msi_count <= 0)
 			goto reset_handler;
 		reg->status = STATUS_IRQ_RAISED;
@@ -319,8 +336,6 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	}
 
 reset_handler:
-	reg->command = 0;
-
 	queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler,
 			   msecs_to_jiffies(1));
 }
@@ -358,6 +373,7 @@ static int pci_epf_test_set_bar(struct pci_epf *epf)
 	struct pci_epc *epc = epf->epc;
 	struct device *dev = &epf->dev;
 	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
 
 	flags = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_32;
 	if (sizeof(dma_addr_t) == 0x8)
@@ -370,7 +386,7 @@ static int pci_epf_test_set_bar(struct pci_epf *epf)
 		if (ret) {
 			pci_epf_free_space(epf, epf_test->reg[bar], bar);
 			dev_err(dev, "failed to set BAR%d\n", bar);
-			if (bar == BAR_0)
+			if (bar == test_reg_bar)
 				return ret;
 		}
 	}
@@ -384,17 +400,20 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
 	struct device *dev = &epf->dev;
 	void *base;
 	int bar;
+	enum pci_barno test_reg_bar = epf_test->test_reg_bar;
 
 	base = pci_epf_alloc_space(epf, sizeof(struct pci_epf_test_reg),
-				   BAR_0);
+				   test_reg_bar);
 	if (!base) {
 		dev_err(dev, "failed to allocated register space\n");
 		return -ENOMEM;
 	}
-	epf_test->reg[0] = base;
+	epf_test->reg[test_reg_bar] = base;
 
-	for (bar = BAR_1; bar <= BAR_5; bar++) {
-		base = pci_epf_alloc_space(epf, bar_size[bar - 1], bar);
+	for (bar = BAR_0; bar <= BAR_5; bar++) {
+		if (bar == test_reg_bar)
+			continue;
+		base = pci_epf_alloc_space(epf, bar_size[bar], bar);
 		if (!base)
 			dev_err(dev, "failed to allocate space for BAR%d\n",
 				bar);
@@ -407,6 +426,7 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
 static int pci_epf_test_bind(struct pci_epf *epf)
 {
 	int ret;
+	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
 	struct pci_epf_header *header = epf->header;
 	struct pci_epc *epc = epf->epc;
 	struct device *dev = &epf->dev;
@@ -432,13 +452,34 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 	if (ret)
 		return ret;
 
+	if (!epf_test->linkup_notifier)
+		queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work);
+
 	return 0;
 }
 
+static const struct pci_epf_device_id pci_epf_test_ids[] = {
+	{
+		.name = "pci_epf_test",
+	},
+	{},
+};
+
 static int pci_epf_test_probe(struct pci_epf *epf)
 {
 	struct pci_epf_test *epf_test;
 	struct device *dev = &epf->dev;
+	const struct pci_epf_device_id *match;
+	struct pci_epf_test_data *data;
+	enum pci_barno test_reg_bar = BAR_0;
+	bool linkup_notifier = true;
+
+	match = pci_epf_match_device(pci_epf_test_ids, epf);
+	data = (struct pci_epf_test_data *)match->driver_data;
+	if (data) {
+		test_reg_bar = data->test_reg_bar;
+		linkup_notifier = data->linkup_notifier;
+	}
 
 	epf_test = devm_kzalloc(dev, sizeof(*epf_test), GFP_KERNEL);
 	if (!epf_test)
@@ -446,6 +487,8 @@ static int pci_epf_test_probe(struct pci_epf *epf)
 
 	epf->header = &test_header;
 	epf_test->epf = epf;
+	epf_test->test_reg_bar = test_reg_bar;
+	epf_test->linkup_notifier = linkup_notifier;
 
 	INIT_DELAYED_WORK(&epf_test->cmd_handler, pci_epf_test_cmd_handler);
 
@@ -453,31 +496,15 @@ static int pci_epf_test_probe(struct pci_epf *epf)
 	return 0;
 }
 
-static int pci_epf_test_remove(struct pci_epf *epf)
-{
-	struct pci_epf_test *epf_test = epf_get_drvdata(epf);
-
-	kfree(epf_test);
-	return 0;
-}
-
 static struct pci_epf_ops ops = {
 	.unbind	= pci_epf_test_unbind,
 	.bind	= pci_epf_test_bind,
 	.linkup = pci_epf_test_linkup,
 };
 
-static const struct pci_epf_device_id pci_epf_test_ids[] = {
-	{
-		.name = "pci_epf_test",
-	},
-	{},
-};
-
 static struct pci_epf_driver test_driver = {
 	.driver.name	= "pci_epf_test",
 	.probe		= pci_epf_test_probe,
-	.remove		= pci_epf_test_remove,
 	.id_table	= pci_epf_test_ids,
 	.ops		= &ops,
 	.owner		= THIS_MODULE,
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index caa7be1..42c2a11 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 
 #include <linux/pci-epc.h>
 #include <linux/pci-epf.h>
@@ -370,6 +371,7 @@ EXPORT_SYMBOL_GPL(pci_epc_write_header);
 int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf)
 {
 	unsigned long flags;
+	struct device *dev = epc->dev.parent;
 
 	if (epf->epc)
 		return -EBUSY;
@@ -381,8 +383,12 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf)
 		return -EINVAL;
 
 	epf->epc = epc;
-	dma_set_coherent_mask(&epf->dev, epc->dev.coherent_dma_mask);
-	epf->dev.dma_mask = epc->dev.dma_mask;
+	if (dev->of_node) {
+		of_dma_configure(&epf->dev, dev->of_node);
+	} else {
+		dma_set_coherent_mask(&epf->dev, epc->dev.coherent_dma_mask);
+		epf->dev.dma_mask = epc->dev.dma_mask;
+	}
 
 	spin_lock_irqsave(&epc->lock, flags);
 	list_add_tail(&epf->list, &epc->pci_epf);
@@ -500,6 +506,7 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops,
 	dma_set_coherent_mask(&epc->dev, dev->coherent_dma_mask);
 	epc->dev.class = pci_epc_class;
 	epc->dev.dma_mask = dev->dma_mask;
+	epc->dev.parent = dev;
 	epc->ops = ops;
 
 	ret = dev_set_name(&epc->dev, "%s", dev_name(dev));
diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c
index 3a94cc1..83b7d5d 100644
--- a/drivers/pci/endpoint/pci-epc-mem.c
+++ b/drivers/pci/endpoint/pci-epc-mem.c
@@ -24,21 +24,54 @@
 #include <linux/pci-epc.h>
 
 /**
- * pci_epc_mem_init() - initialize the pci_epc_mem structure
+ * pci_epc_mem_get_order() - determine the allocation order of a memory size
+ * @mem: address space of the endpoint controller
+ * @size: the size for which to get the order
+ *
+ * Reimplement get_order() for mem->page_size since the generic get_order
+ * always gets order with a constant PAGE_SIZE.
+ */
+static int pci_epc_mem_get_order(struct pci_epc_mem *mem, size_t size)
+{
+	int order;
+	unsigned int page_shift = ilog2(mem->page_size);
+
+	size--;
+	size >>= page_shift;
+#if BITS_PER_LONG == 32
+	order = fls(size);
+#else
+	order = fls64(size);
+#endif
+	return order;
+}
+
+/**
+ * __pci_epc_mem_init() - initialize the pci_epc_mem structure
  * @epc: the EPC device that invoked pci_epc_mem_init
  * @phys_base: the physical address of the base
  * @size: the size of the address space
+ * @page_size: size of each page
  *
  * Invoke to initialize the pci_epc_mem structure used by the
  * endpoint functions to allocate mapped PCI address.
  */
-int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size)
+int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size,
+		       size_t page_size)
 {
 	int ret;
 	struct pci_epc_mem *mem;
 	unsigned long *bitmap;
-	int pages = size >> PAGE_SHIFT;
-	int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
+	unsigned int page_shift;
+	int pages;
+	int bitmap_size;
+
+	if (page_size < PAGE_SIZE)
+		page_size = PAGE_SIZE;
+
+	page_shift = ilog2(page_size);
+	pages = size >> page_shift;
+	bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
 
 	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	if (!mem) {
@@ -54,6 +87,7 @@ int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size)
 
 	mem->bitmap = bitmap;
 	mem->phys_base = phys_base;
+	mem->page_size = page_size;
 	mem->pages = pages;
 	mem->size = size;
 
@@ -67,7 +101,7 @@ int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size)
 err:
 return ret;
 }
-EXPORT_SYMBOL_GPL(pci_epc_mem_init);
+EXPORT_SYMBOL_GPL(__pci_epc_mem_init);
 
 /**
  * pci_epc_mem_exit() - cleanup the pci_epc_mem structure
@@ -101,13 +135,17 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
 	int pageno;
 	void __iomem *virt_addr;
 	struct pci_epc_mem *mem = epc->mem;
-	int order = get_order(size);
+	unsigned int page_shift = ilog2(mem->page_size);
+	int order;
+
+	size = ALIGN(size, mem->page_size);
+	order = pci_epc_mem_get_order(mem, size);
 
 	pageno = bitmap_find_free_region(mem->bitmap, mem->pages, order);
 	if (pageno < 0)
 		return NULL;
 
-	*phys_addr = mem->phys_base + (pageno << PAGE_SHIFT);
+	*phys_addr = mem->phys_base + (pageno << page_shift);
 	virt_addr = ioremap(*phys_addr, size);
 	if (!virt_addr)
 		bitmap_release_region(mem->bitmap, pageno, order);
@@ -129,11 +167,14 @@ void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
 			   void __iomem *virt_addr, size_t size)
 {
 	int pageno;
-	int order = get_order(size);
 	struct pci_epc_mem *mem = epc->mem;
+	unsigned int page_shift = ilog2(mem->page_size);
+	int order;
 
 	iounmap(virt_addr);
-	pageno = (phys_addr - mem->phys_base) >> PAGE_SHIFT;
+	pageno = (phys_addr - mem->phys_base) >> page_shift;
+	size = ALIGN(size, mem->page_size);
+	order = pci_epc_mem_get_order(mem, size);
 	bitmap_release_region(mem->bitmap, pageno, order);
 }
 EXPORT_SYMBOL_GPL(pci_epc_mem_free_addr);
diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c
index 6877d6a..ae1611a 100644
--- a/drivers/pci/endpoint/pci-epf-core.c
+++ b/drivers/pci/endpoint/pci-epf-core.c
@@ -27,7 +27,7 @@
 #include <linux/pci-ep-cfs.h>
 
 static struct bus_type pci_epf_bus_type;
-static struct device_type pci_epf_type;
+static const struct device_type pci_epf_type;
 
 /**
  * pci_epf_linkup() - Notify the function driver that EPC device has
@@ -267,6 +267,22 @@ struct pci_epf *pci_epf_create(const char *name)
 }
 EXPORT_SYMBOL_GPL(pci_epf_create);
 
+const struct pci_epf_device_id *
+pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf)
+{
+	if (!id || !epf)
+		return NULL;
+
+	while (*id->name) {
+		if (strcmp(epf->name, id->name) == 0)
+			return id;
+		id++;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_epf_match_device);
+
 static void pci_epf_dev_release(struct device *dev)
 {
 	struct pci_epf *epf = to_pci_epf(dev);
@@ -275,7 +291,7 @@ static void pci_epf_dev_release(struct device *dev)
 	kfree(epf);
 }
 
-static struct device_type pci_epf_type = {
+static const struct device_type pci_epf_type = {
 	.release	= pci_epf_dev_release,
 };
 
@@ -317,11 +333,12 @@ static int pci_epf_device_probe(struct device *dev)
 
 static int pci_epf_device_remove(struct device *dev)
 {
-	int ret;
+	int ret = 0;
 	struct pci_epf *epf = to_pci_epf(dev);
 	struct pci_epf_driver *driver = to_pci_epf_driver(dev->driver);
 
-	ret = driver->remove(epf);
+	if (driver->remove)
+		ret = driver->remove(epf);
 	epf->driver = NULL;
 
 	return ret;
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 89d61c2..b868803 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -71,7 +71,7 @@
 
 config PCIE_XILINX
 	bool "Xilinx AXI PCIe host bridge support"
-	depends on ARCH_ZYNQ || MICROBLAZE
+	depends on ARCH_ZYNQ || MICROBLAZE || (MIPS && PCI_DRIVERS_GENERIC)
 	help
 	  Say 'Y' here if you want kernel to support the Xilinx AXI PCIe
 	  Host Bridge driver.
@@ -182,14 +182,13 @@
 
 config PCIE_MEDIATEK
 	bool "MediaTek PCIe controller"
-	depends on ARM && (ARCH_MEDIATEK || COMPILE_TEST)
+	depends on (ARM || ARM64) && (ARCH_MEDIATEK || COMPILE_TEST)
 	depends on OF
 	depends on PCI
 	select PCIEPORTBUS
 	help
 	  Say Y here if you want to enable PCIe controller support on
-	  MT7623 series SoCs.  There is one single root complex with 3 root
-	  ports available.  Each port supports Gen2 lane x1.
+	  MediaTek SoCs.
 
 config PCIE_TANGO_SMP8759
 	bool "Tango SMP8759 PCIe controller (DANGEROUS)"
diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c
index 5fb9b62..89f4e3d 100644
--- a/drivers/pci/host/pci-aardvark.c
+++ b/drivers/pci/host/pci-aardvark.c
@@ -191,7 +191,6 @@
 #define LINK_WAIT_USLEEP_MIN		90000
 #define LINK_WAIT_USLEEP_MAX		100000
 
-#define LEGACY_IRQ_NUM			4
 #define MSI_IRQ_NUM			32
 
 struct advk_pcie {
@@ -729,7 +728,7 @@ static int advk_pcie_init_irq_domain(struct advk_pcie *pcie)
 	irq_chip->irq_unmask = advk_pcie_irq_unmask;
 
 	pcie->irq_domain =
-		irq_domain_add_linear(pcie_intc_node, LEGACY_IRQ_NUM,
+		irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
 				      &advk_pcie_irq_domain_ops, pcie);
 	if (!pcie->irq_domain) {
 		dev_err(dev, "Failed to get a INTx IRQ domain\n");
@@ -786,7 +785,7 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
 		advk_pcie_handle_msi(pcie);
 
 	/* Process legacy interrupts */
-	for (i = 0; i < LEGACY_IRQ_NUM; i++) {
+	for (i = 0; i < PCI_NUM_INTX; i++) {
 		if (!(status & PCIE_ISR0_INTX_ASSERT(i)))
 			continue;
 
diff --git a/drivers/pci/host/pci-ftpci100.c b/drivers/pci/host/pci-ftpci100.c
index 5162dff..96028f0 100644
--- a/drivers/pci/host/pci-ftpci100.c
+++ b/drivers/pci/host/pci-ftpci100.c
@@ -350,12 +350,12 @@ static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p)
 
 	/* All PCI IRQs cascade off this one */
 	irq = of_irq_get(intc, 0);
-	if (!irq) {
+	if (irq <= 0) {
 		dev_err(p->dev, "failed to get parent IRQ\n");
-		return -EINVAL;
+		return irq ?: -EINVAL;
 	}
 
-	p->irqdomain = irq_domain_add_linear(intc, 4,
+	p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX,
 					     &faraday_pci_irqdomain_ops, p);
 	if (!p->irqdomain) {
 		dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n");
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index aba0414..0fe3ea1 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -50,6 +50,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 #include <linux/semaphore.h>
 #include <linux/irqdomain.h>
 #include <asm/irqdomain.h>
@@ -1113,7 +1114,12 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 		goto free_int_desc;
 	}
 
-	wait_for_completion(&comp.comp_pkt.host_event);
+	/*
+	 * Since this function is called with IRQ locks held, can't
+	 * do normal wait for completion; instead poll.
+	 */
+	while (!try_wait_for_completion(&comp.comp_pkt.host_event))
+		udelay(100);
 
 	if (comp.comp_pkt.completion_status < 0) {
 		dev_err(&hbus->hdev->device,
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index f353a6e..8d88f19 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -1054,8 +1054,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
 	port->pcie = pcie;
 
 	if (of_property_read_u32(child, "marvell,pcie-port", &port->port)) {
-		dev_warn(dev, "ignoring %s, missing pcie-port property\n",
-			 of_node_full_name(child));
+		dev_warn(dev, "ignoring %pOF, missing pcie-port property\n",
+			 child);
 		goto skip;
 	}
 
@@ -1106,8 +1106,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
 		}
 
 		if (flags & OF_GPIO_ACTIVE_LOW) {
-			dev_info(dev, "%s: reset gpio is active low\n",
-				 of_node_full_name(child));
+			dev_info(dev, "%pOF: reset gpio is active low\n",
+				 child);
 			gpio_flags = GPIOF_ACTIVE_LOW |
 				     GPIOF_OUT_INIT_LOW;
 		} else {
@@ -1186,8 +1186,7 @@ static int mvebu_pcie_powerup(struct mvebu_pcie_port *port)
  */
 static void mvebu_pcie_powerdown(struct mvebu_pcie_port *port)
 {
-	if (port->reset_gpio)
-		gpiod_set_value_cansleep(port->reset_gpio, 1);
+	gpiod_set_value_cansleep(port->reset_gpio, 1);
 
 	clk_disable_unprepare(port->clk);
 }
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index b3722b7..9c40da5 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -1147,15 +1147,15 @@ static int tegra_pcie_resets_get(struct tegra_pcie *pcie)
 {
 	struct device *dev = pcie->dev;
 
-	pcie->pex_rst = devm_reset_control_get(dev, "pex");
+	pcie->pex_rst = devm_reset_control_get_exclusive(dev, "pex");
 	if (IS_ERR(pcie->pex_rst))
 		return PTR_ERR(pcie->pex_rst);
 
-	pcie->afi_rst = devm_reset_control_get(dev, "afi");
+	pcie->afi_rst = devm_reset_control_get_exclusive(dev, "afi");
 	if (IS_ERR(pcie->afi_rst))
 		return PTR_ERR(pcie->afi_rst);
 
-	pcie->pcie_xrst = devm_reset_control_get(dev, "pcie_x");
+	pcie->pcie_xrst = devm_reset_control_get_exclusive(dev, "pcie_x");
 	if (IS_ERR(pcie->pcie_xrst))
 		return PTR_ERR(pcie->pcie_xrst);
 
@@ -1703,8 +1703,7 @@ static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie)
 		pcie->num_supplies = 2;
 
 	if (pcie->num_supplies == 0) {
-		dev_err(dev, "device %s not supported in legacy mode\n",
-			np->full_name);
+		dev_err(dev, "device %pOF not supported in legacy mode\n", np);
 		return -ENODEV;
 	}
 
diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
index f1b633b..1f42a20 100644
--- a/drivers/pci/host/pci-xgene-msi.c
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -489,7 +489,7 @@ static int xgene_msi_probe(struct platform_device *pdev)
 		if (virt_msir < 0) {
 			dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
 				irq_index);
-			rc = -EINVAL;
+			rc = virt_msir;
 			goto error;
 		}
 		xgene_msi->msi_groups[irq_index].gic_irq = virt_msir;
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index bd89747..0876451 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -61,7 +61,7 @@
 #define SZ_1T				(SZ_1G*1024ULL)
 #define PIPE_PHY_RATE_RD(src)		((0xc000 & (u32)(src)) >> 0xe)
 
-#define ROOT_CAP_AND_CTRL		0x5C
+#define XGENE_V1_PCI_EXP_CAP		0x40
 
 /* PCIe IP version */
 #define XGENE_PCIE_IP_VER_UNKN		0
@@ -160,7 +160,7 @@ static bool xgene_pcie_hide_rc_bars(struct pci_bus *bus, int offset)
 }
 
 static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
-			      int offset)
+					int offset)
 {
 	if ((pci_is_root_bus(bus) && devfn != 0) ||
 	    xgene_pcie_hide_rc_bars(bus, offset))
@@ -189,7 +189,7 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 	 * Avoid this by not claiming to support CRS.
 	 */
 	if (pci_is_root_bus(bus) && (port->version == XGENE_PCIE_IP_VER_1) &&
-	    ((where & ~0x3) == ROOT_CAP_AND_CTRL))
+	    ((where & ~0x3) == XGENE_V1_PCI_EXP_CAP + PCI_EXP_RTCTL))
 		*val &= ~(PCI_EXP_RTCAP_CRSVIS << 16);
 
 	if (size <= 2)
@@ -265,12 +265,12 @@ static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
 }
 
 struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
-	.bus_shift      = 16,
-	.init           = xgene_v1_pcie_ecam_init,
-	.pci_ops        = {
-		.map_bus        = xgene_pcie_map_bus,
-		.read           = xgene_pcie_config_read32,
-		.write          = pci_generic_config_write,
+	.bus_shift	= 16,
+	.init		= xgene_v1_pcie_ecam_init,
+	.pci_ops	= {
+		.map_bus	= xgene_pcie_map_bus,
+		.read		= xgene_pcie_config_read32,
+		.write		= pci_generic_config_write,
 	}
 };
 
@@ -280,12 +280,12 @@ static int xgene_v2_pcie_ecam_init(struct pci_config_window *cfg)
 }
 
 struct pci_ecam_ops xgene_v2_pcie_ecam_ops = {
-	.bus_shift      = 16,
-	.init           = xgene_v2_pcie_ecam_init,
-	.pci_ops        = {
-		.map_bus        = xgene_pcie_map_bus,
-		.read           = xgene_pcie_config_read32,
-		.write          = pci_generic_config_write,
+	.bus_shift	= 16,
+	.init		= xgene_v2_pcie_ecam_init,
+	.pci_ops	= {
+		.map_bus	= xgene_pcie_map_bus,
+		.read		= xgene_pcie_config_read32,
+		.write		= pci_generic_config_write,
 	}
 };
 #endif
@@ -318,7 +318,7 @@ static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
 }
 
 static void xgene_pcie_linkup(struct xgene_pcie_port *port,
-				   u32 *lanes, u32 *speed)
+			      u32 *lanes, u32 *speed)
 {
 	u32 val32;
 
@@ -593,8 +593,7 @@ static void xgene_pcie_clear_config(struct xgene_pcie_port *port)
 		xgene_pcie_writel(port, i, 0);
 }
 
-static int xgene_pcie_setup(struct xgene_pcie_port *port,
-			    struct list_head *res,
+static int xgene_pcie_setup(struct xgene_pcie_port *port, struct list_head *res,
 			    resource_size_t io_base)
 {
 	struct device *dev = port->dev;
@@ -706,9 +705,9 @@ static const struct of_device_id xgene_pcie_match_table[] = {
 
 static struct platform_driver xgene_pcie_driver = {
 	.driver = {
-		   .name = "xgene-pcie",
-		   .of_match_table = of_match_ptr(xgene_pcie_match_table),
-		   .suppress_bind_attrs = true,
+		.name = "xgene-pcie",
+		.of_match_table = of_match_ptr(xgene_pcie_match_table),
+		.suppress_bind_attrs = true,
 	},
 	.probe = xgene_pcie_probe_bridge,
 };
diff --git a/drivers/pci/host/pcie-altera-msi.c b/drivers/pci/host/pcie-altera-msi.c
index 4e5d628..d8141f4 100644
--- a/drivers/pci/host/pcie-altera-msi.c
+++ b/drivers/pci/host/pcie-altera-msi.c
@@ -64,13 +64,11 @@ static void altera_msi_isr(struct irq_desc *desc)
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct altera_msi *msi;
 	unsigned long status;
-	u32 num_of_vectors;
 	u32 bit;
 	u32 virq;
 
 	chained_irq_enter(chip, desc);
 	msi = irq_desc_get_handler_data(desc);
-	num_of_vectors = msi->num_of_vectors;
 
 	while ((status = msi_readl(msi, MSI_STATUS)) != 0) {
 		for_each_set_bit(bit, &status, msi->num_of_vectors) {
@@ -267,9 +265,9 @@ static int altera_msi_probe(struct platform_device *pdev)
 		return ret;
 
 	msi->irq = platform_get_irq(pdev, 0);
-	if (msi->irq <= 0) {
+	if (msi->irq < 0) {
 		dev_err(&pdev->dev, "failed to map IRQ: %d\n", msi->irq);
-		ret = -ENODEV;
+		ret = msi->irq;
 		goto err;
 	}
 
diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index 4ea4f8f..b468b8ccc 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -76,8 +76,6 @@
 #define LINK_UP_TIMEOUT			HZ
 #define LINK_RETRAIN_TIMEOUT		HZ
 
-#define INTX_NUM			4
-
 #define DWORD_MASK			3
 
 struct altera_pcie {
@@ -464,6 +462,7 @@ static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
 
 static const struct irq_domain_ops intx_domain_ops = {
 	.map = altera_pcie_intx_map,
+	.xlate = pci_irqd_intx_xlate,
 };
 
 static void altera_pcie_isr(struct irq_desc *desc)
@@ -481,11 +480,11 @@ static void altera_pcie_isr(struct irq_desc *desc)
 
 	while ((status = cra_readl(pcie, P2A_INT_STATUS)
 		& P2A_INT_STS_ALL) != 0) {
-		for_each_set_bit(bit, &status, INTX_NUM) {
+		for_each_set_bit(bit, &status, PCI_NUM_INTX) {
 			/* clear interrupts */
 			cra_writel(pcie, 1 << bit, P2A_INT_STATUS);
 
-			virq = irq_find_mapping(pcie->irq_domain, bit + 1);
+			virq = irq_find_mapping(pcie->irq_domain, bit);
 			if (virq)
 				generic_handle_irq(virq);
 			else
@@ -536,7 +535,7 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie)
 	struct device_node *node = dev->of_node;
 
 	/* Setup INTx */
-	pcie->irq_domain = irq_domain_add_linear(node, INTX_NUM + 1,
+	pcie->irq_domain = irq_domain_add_linear(node, PCI_NUM_INTX,
 					&intx_domain_ops, pcie);
 	if (!pcie->irq_domain) {
 		dev_err(dev, "Failed to get a INTx IRQ domain\n");
@@ -559,9 +558,9 @@ static int altera_pcie_parse_dt(struct altera_pcie *pcie)
 
 	/* setup IRQ */
 	pcie->irq = platform_get_irq(pdev, 0);
-	if (pcie->irq <= 0) {
+	if (pcie->irq < 0) {
 		dev_err(dev, "failed to get IRQ: %d\n", pcie->irq);
-		return -EINVAL;
+		return pcie->irq;
 	}
 
 	irq_set_chained_handler_and_data(pcie->irq, altera_pcie_isr, pcie);
diff --git a/drivers/pci/host/pcie-iproc-msi.c b/drivers/pci/host/pcie-iproc-msi.c
index 9fad791..2d0f535 100644
--- a/drivers/pci/host/pcie-iproc-msi.c
+++ b/drivers/pci/host/pcie-iproc-msi.c
@@ -317,7 +317,6 @@ static void iproc_msi_handler(struct irq_desc *desc)
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct iproc_msi_grp *grp;
 	struct iproc_msi *msi;
-	struct iproc_pcie *pcie;
 	u32 eq, head, tail, nr_events;
 	unsigned long hwirq;
 	int virq;
@@ -326,7 +325,6 @@ static void iproc_msi_handler(struct irq_desc *desc)
 
 	grp = irq_desc_get_handler_data(desc);
 	msi = grp->msi;
-	pcie = msi->pcie;
 	eq = grp->eq;
 
 	/*
diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
index 2253119..a5073a9 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -134,6 +134,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
 	return iproc_pcie_remove(pcie);
 }
 
+static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev)
+{
+	struct iproc_pcie *pcie = platform_get_drvdata(pdev);
+
+	iproc_pcie_shutdown(pcie);
+}
+
 static struct platform_driver iproc_pcie_pltfm_driver = {
 	.driver = {
 		.name = "iproc-pcie",
@@ -141,6 +148,7 @@ static struct platform_driver iproc_pcie_pltfm_driver = {
 	},
 	.probe = iproc_pcie_pltfm_probe,
 	.remove = iproc_pcie_pltfm_remove,
+	.shutdown = iproc_pcie_pltfm_shutdown,
 };
 module_platform_driver(iproc_pcie_pltfm_driver);
 
diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c
index c574863..3a8b9d2 100644
--- a/drivers/pci/host/pcie-iproc.c
+++ b/drivers/pci/host/pcie-iproc.c
@@ -31,68 +31,71 @@
 
 #include "pcie-iproc.h"
 
-#define EP_PERST_SOURCE_SELECT_SHIFT 2
-#define EP_PERST_SOURCE_SELECT       BIT(EP_PERST_SOURCE_SELECT_SHIFT)
-#define EP_MODE_SURVIVE_PERST_SHIFT  1
-#define EP_MODE_SURVIVE_PERST        BIT(EP_MODE_SURVIVE_PERST_SHIFT)
-#define RC_PCIE_RST_OUTPUT_SHIFT     0
-#define RC_PCIE_RST_OUTPUT           BIT(RC_PCIE_RST_OUTPUT_SHIFT)
-#define PAXC_RESET_MASK              0x7f
+#define EP_PERST_SOURCE_SELECT_SHIFT	2
+#define EP_PERST_SOURCE_SELECT		BIT(EP_PERST_SOURCE_SELECT_SHIFT)
+#define EP_MODE_SURVIVE_PERST_SHIFT	1
+#define EP_MODE_SURVIVE_PERST		BIT(EP_MODE_SURVIVE_PERST_SHIFT)
+#define RC_PCIE_RST_OUTPUT_SHIFT	0
+#define RC_PCIE_RST_OUTPUT		BIT(RC_PCIE_RST_OUTPUT_SHIFT)
+#define PAXC_RESET_MASK			0x7f
 
-#define GIC_V3_CFG_SHIFT             0
-#define GIC_V3_CFG                   BIT(GIC_V3_CFG_SHIFT)
+#define GIC_V3_CFG_SHIFT		0
+#define GIC_V3_CFG			BIT(GIC_V3_CFG_SHIFT)
 
-#define MSI_ENABLE_CFG_SHIFT         0
-#define MSI_ENABLE_CFG               BIT(MSI_ENABLE_CFG_SHIFT)
+#define MSI_ENABLE_CFG_SHIFT		0
+#define MSI_ENABLE_CFG			BIT(MSI_ENABLE_CFG_SHIFT)
 
-#define CFG_IND_ADDR_MASK            0x00001ffc
+#define CFG_IND_ADDR_MASK		0x00001ffc
 
-#define CFG_ADDR_BUS_NUM_SHIFT       20
-#define CFG_ADDR_BUS_NUM_MASK        0x0ff00000
-#define CFG_ADDR_DEV_NUM_SHIFT       15
-#define CFG_ADDR_DEV_NUM_MASK        0x000f8000
-#define CFG_ADDR_FUNC_NUM_SHIFT      12
-#define CFG_ADDR_FUNC_NUM_MASK       0x00007000
-#define CFG_ADDR_REG_NUM_SHIFT       2
-#define CFG_ADDR_REG_NUM_MASK        0x00000ffc
-#define CFG_ADDR_CFG_TYPE_SHIFT      0
-#define CFG_ADDR_CFG_TYPE_MASK       0x00000003
+#define CFG_ADDR_BUS_NUM_SHIFT		20
+#define CFG_ADDR_BUS_NUM_MASK		0x0ff00000
+#define CFG_ADDR_DEV_NUM_SHIFT		15
+#define CFG_ADDR_DEV_NUM_MASK		0x000f8000
+#define CFG_ADDR_FUNC_NUM_SHIFT		12
+#define CFG_ADDR_FUNC_NUM_MASK		0x00007000
+#define CFG_ADDR_REG_NUM_SHIFT		2
+#define CFG_ADDR_REG_NUM_MASK		0x00000ffc
+#define CFG_ADDR_CFG_TYPE_SHIFT		0
+#define CFG_ADDR_CFG_TYPE_MASK		0x00000003
 
-#define SYS_RC_INTX_MASK             0xf
+#define SYS_RC_INTX_MASK		0xf
 
-#define PCIE_PHYLINKUP_SHIFT         3
-#define PCIE_PHYLINKUP               BIT(PCIE_PHYLINKUP_SHIFT)
-#define PCIE_DL_ACTIVE_SHIFT         2
-#define PCIE_DL_ACTIVE               BIT(PCIE_DL_ACTIVE_SHIFT)
+#define PCIE_PHYLINKUP_SHIFT		3
+#define PCIE_PHYLINKUP			BIT(PCIE_PHYLINKUP_SHIFT)
+#define PCIE_DL_ACTIVE_SHIFT		2
+#define PCIE_DL_ACTIVE			BIT(PCIE_DL_ACTIVE_SHIFT)
 
-#define APB_ERR_EN_SHIFT             0
-#define APB_ERR_EN                   BIT(APB_ERR_EN_SHIFT)
+#define APB_ERR_EN_SHIFT		0
+#define APB_ERR_EN			BIT(APB_ERR_EN_SHIFT)
+
+#define CFG_RETRY_STATUS		0xffff0001
+#define CFG_RETRY_STATUS_TIMEOUT_US	500000 /* 500 milliseconds */
 
 /* derive the enum index of the outbound/inbound mapping registers */
-#define MAP_REG(base_reg, index)      ((base_reg) + (index) * 2)
+#define MAP_REG(base_reg, index)	((base_reg) + (index) * 2)
 
 /*
  * Maximum number of outbound mapping window sizes that can be supported by any
  * OARR/OMAP mapping pair
  */
-#define MAX_NUM_OB_WINDOW_SIZES      4
+#define MAX_NUM_OB_WINDOW_SIZES		4
 
-#define OARR_VALID_SHIFT             0
-#define OARR_VALID                   BIT(OARR_VALID_SHIFT)
-#define OARR_SIZE_CFG_SHIFT          1
+#define OARR_VALID_SHIFT		0
+#define OARR_VALID			BIT(OARR_VALID_SHIFT)
+#define OARR_SIZE_CFG_SHIFT		1
 
 /*
  * Maximum number of inbound mapping region sizes that can be supported by an
  * IARR
  */
-#define MAX_NUM_IB_REGION_SIZES      9
+#define MAX_NUM_IB_REGION_SIZES		9
 
-#define IMAP_VALID_SHIFT             0
-#define IMAP_VALID                   BIT(IMAP_VALID_SHIFT)
+#define IMAP_VALID_SHIFT		0
+#define IMAP_VALID			BIT(IMAP_VALID_SHIFT)
 
-#define PCI_EXP_CAP			0xac
+#define IPROC_PCI_EXP_CAP		0xac
 
-#define IPROC_PCIE_REG_INVALID 0xffff
+#define IPROC_PCIE_REG_INVALID		0xffff
 
 /**
  * iProc PCIe outbound mapping controller specific parameters
@@ -304,80 +307,80 @@ enum iproc_pcie_reg {
 
 /* iProc PCIe PAXB BCMA registers */
 static const u16 iproc_pcie_reg_paxb_bcma[] = {
-	[IPROC_PCIE_CLK_CTRL]         = 0x000,
-	[IPROC_PCIE_CFG_IND_ADDR]     = 0x120,
-	[IPROC_PCIE_CFG_IND_DATA]     = 0x124,
-	[IPROC_PCIE_CFG_ADDR]         = 0x1f8,
-	[IPROC_PCIE_CFG_DATA]         = 0x1fc,
-	[IPROC_PCIE_INTX_EN]          = 0x330,
-	[IPROC_PCIE_LINK_STATUS]      = 0xf0c,
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x120,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x124,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+	[IPROC_PCIE_INTX_EN]		= 0x330,
+	[IPROC_PCIE_LINK_STATUS]	= 0xf0c,
 };
 
 /* iProc PCIe PAXB registers */
 static const u16 iproc_pcie_reg_paxb[] = {
-	[IPROC_PCIE_CLK_CTRL]         = 0x000,
-	[IPROC_PCIE_CFG_IND_ADDR]     = 0x120,
-	[IPROC_PCIE_CFG_IND_DATA]     = 0x124,
-	[IPROC_PCIE_CFG_ADDR]         = 0x1f8,
-	[IPROC_PCIE_CFG_DATA]         = 0x1fc,
-	[IPROC_PCIE_INTX_EN]          = 0x330,
-	[IPROC_PCIE_OARR0]            = 0xd20,
-	[IPROC_PCIE_OMAP0]            = 0xd40,
-	[IPROC_PCIE_OARR1]            = 0xd28,
-	[IPROC_PCIE_OMAP1]            = 0xd48,
-	[IPROC_PCIE_LINK_STATUS]      = 0xf0c,
-	[IPROC_PCIE_APB_ERR_EN]       = 0xf40,
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x120,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x124,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+	[IPROC_PCIE_INTX_EN]		= 0x330,
+	[IPROC_PCIE_OARR0]		= 0xd20,
+	[IPROC_PCIE_OMAP0]		= 0xd40,
+	[IPROC_PCIE_OARR1]		= 0xd28,
+	[IPROC_PCIE_OMAP1]		= 0xd48,
+	[IPROC_PCIE_LINK_STATUS]	= 0xf0c,
+	[IPROC_PCIE_APB_ERR_EN]		= 0xf40,
 };
 
 /* iProc PCIe PAXB v2 registers */
 static const u16 iproc_pcie_reg_paxb_v2[] = {
-	[IPROC_PCIE_CLK_CTRL]         = 0x000,
-	[IPROC_PCIE_CFG_IND_ADDR]     = 0x120,
-	[IPROC_PCIE_CFG_IND_DATA]     = 0x124,
-	[IPROC_PCIE_CFG_ADDR]         = 0x1f8,
-	[IPROC_PCIE_CFG_DATA]         = 0x1fc,
-	[IPROC_PCIE_INTX_EN]          = 0x330,
-	[IPROC_PCIE_OARR0]            = 0xd20,
-	[IPROC_PCIE_OMAP0]            = 0xd40,
-	[IPROC_PCIE_OARR1]            = 0xd28,
-	[IPROC_PCIE_OMAP1]            = 0xd48,
-	[IPROC_PCIE_OARR2]            = 0xd60,
-	[IPROC_PCIE_OMAP2]            = 0xd68,
-	[IPROC_PCIE_OARR3]            = 0xdf0,
-	[IPROC_PCIE_OMAP3]            = 0xdf8,
-	[IPROC_PCIE_IARR0]            = 0xd00,
-	[IPROC_PCIE_IMAP0]            = 0xc00,
-	[IPROC_PCIE_IARR2]            = 0xd10,
-	[IPROC_PCIE_IMAP2]            = 0xcc0,
-	[IPROC_PCIE_IARR3]            = 0xe00,
-	[IPROC_PCIE_IMAP3]            = 0xe08,
-	[IPROC_PCIE_IARR4]            = 0xe68,
-	[IPROC_PCIE_IMAP4]            = 0xe70,
-	[IPROC_PCIE_LINK_STATUS]      = 0xf0c,
-	[IPROC_PCIE_APB_ERR_EN]       = 0xf40,
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x120,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x124,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
+	[IPROC_PCIE_INTX_EN]		= 0x330,
+	[IPROC_PCIE_OARR0]		= 0xd20,
+	[IPROC_PCIE_OMAP0]		= 0xd40,
+	[IPROC_PCIE_OARR1]		= 0xd28,
+	[IPROC_PCIE_OMAP1]		= 0xd48,
+	[IPROC_PCIE_OARR2]		= 0xd60,
+	[IPROC_PCIE_OMAP2]		= 0xd68,
+	[IPROC_PCIE_OARR3]		= 0xdf0,
+	[IPROC_PCIE_OMAP3]		= 0xdf8,
+	[IPROC_PCIE_IARR0]		= 0xd00,
+	[IPROC_PCIE_IMAP0]		= 0xc00,
+	[IPROC_PCIE_IARR2]		= 0xd10,
+	[IPROC_PCIE_IMAP2]		= 0xcc0,
+	[IPROC_PCIE_IARR3]		= 0xe00,
+	[IPROC_PCIE_IMAP3]		= 0xe08,
+	[IPROC_PCIE_IARR4]		= 0xe68,
+	[IPROC_PCIE_IMAP4]		= 0xe70,
+	[IPROC_PCIE_LINK_STATUS]	= 0xf0c,
+	[IPROC_PCIE_APB_ERR_EN]		= 0xf40,
 };
 
 /* iProc PCIe PAXC v1 registers */
 static const u16 iproc_pcie_reg_paxc[] = {
-	[IPROC_PCIE_CLK_CTRL]         = 0x000,
-	[IPROC_PCIE_CFG_IND_ADDR]     = 0x1f0,
-	[IPROC_PCIE_CFG_IND_DATA]     = 0x1f4,
-	[IPROC_PCIE_CFG_ADDR]         = 0x1f8,
-	[IPROC_PCIE_CFG_DATA]         = 0x1fc,
+	[IPROC_PCIE_CLK_CTRL]		= 0x000,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x1f0,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x1f4,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
 };
 
 /* iProc PCIe PAXC v2 registers */
 static const u16 iproc_pcie_reg_paxc_v2[] = {
-	[IPROC_PCIE_MSI_GIC_MODE]     = 0x050,
-	[IPROC_PCIE_MSI_BASE_ADDR]    = 0x074,
-	[IPROC_PCIE_MSI_WINDOW_SIZE]  = 0x078,
-	[IPROC_PCIE_MSI_ADDR_LO]      = 0x07c,
-	[IPROC_PCIE_MSI_ADDR_HI]      = 0x080,
-	[IPROC_PCIE_MSI_EN_CFG]       = 0x09c,
-	[IPROC_PCIE_CFG_IND_ADDR]     = 0x1f0,
-	[IPROC_PCIE_CFG_IND_DATA]     = 0x1f4,
-	[IPROC_PCIE_CFG_ADDR]         = 0x1f8,
-	[IPROC_PCIE_CFG_DATA]         = 0x1fc,
+	[IPROC_PCIE_MSI_GIC_MODE]	= 0x050,
+	[IPROC_PCIE_MSI_BASE_ADDR]	= 0x074,
+	[IPROC_PCIE_MSI_WINDOW_SIZE]	= 0x078,
+	[IPROC_PCIE_MSI_ADDR_LO]	= 0x07c,
+	[IPROC_PCIE_MSI_ADDR_HI]	= 0x080,
+	[IPROC_PCIE_MSI_EN_CFG]		= 0x09c,
+	[IPROC_PCIE_CFG_IND_ADDR]	= 0x1f0,
+	[IPROC_PCIE_CFG_IND_DATA]	= 0x1f4,
+	[IPROC_PCIE_CFG_ADDR]		= 0x1f8,
+	[IPROC_PCIE_CFG_DATA]		= 0x1fc,
 };
 
 static inline struct iproc_pcie *iproc_data(struct pci_bus *bus)
@@ -448,18 +451,112 @@ static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus,
 	}
 }
 
+static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie,
+					       unsigned int busno,
+					       unsigned int slot,
+					       unsigned int fn,
+					       int where)
+{
+	u16 offset;
+	u32 val;
+
+	/* EP device access */
+	val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
+		(slot << CFG_ADDR_DEV_NUM_SHIFT) |
+		(fn << CFG_ADDR_FUNC_NUM_SHIFT) |
+		(where & CFG_ADDR_REG_NUM_MASK) |
+		(1 & CFG_ADDR_CFG_TYPE_MASK);
+
+	iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
+	offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
+
+	if (iproc_pcie_reg_is_invalid(offset))
+		return NULL;
+
+	return (pcie->base + offset);
+}
+
+static unsigned int iproc_pcie_cfg_retry(void __iomem *cfg_data_p)
+{
+	int timeout = CFG_RETRY_STATUS_TIMEOUT_US;
+	unsigned int data;
+
+	/*
+	 * As per PCIe spec r3.1, sec 2.3.2, CRS Software Visibility only
+	 * affects config reads of the Vendor ID.  For config writes or any
+	 * other config reads, the Root may automatically reissue the
+	 * configuration request again as a new request.
+	 *
+	 * For config reads, this hardware returns CFG_RETRY_STATUS data
+	 * when it receives a CRS completion, regardless of the address of
+	 * the read or the CRS Software Visibility Enable bit.  As a
+	 * partial workaround for this, we retry in software any read that
+	 * returns CFG_RETRY_STATUS.
+	 *
+	 * Note that a non-Vendor ID config register may have a value of
+	 * CFG_RETRY_STATUS.  If we read that, we can't distinguish it from
+	 * a CRS completion, so we will incorrectly retry the read and
+	 * eventually return the wrong data (0xffffffff).
+	 */
+	data = readl(cfg_data_p);
+	while (data == CFG_RETRY_STATUS && timeout--) {
+		udelay(1);
+		data = readl(cfg_data_p);
+	}
+
+	if (data == CFG_RETRY_STATUS)
+		data = 0xffffffff;
+
+	return data;
+}
+
+static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, u32 *val)
+{
+	struct iproc_pcie *pcie = iproc_data(bus);
+	unsigned int slot = PCI_SLOT(devfn);
+	unsigned int fn = PCI_FUNC(devfn);
+	unsigned int busno = bus->number;
+	void __iomem *cfg_data_p;
+	unsigned int data;
+	int ret;
+
+	/* root complex access */
+	if (busno == 0) {
+		ret = pci_generic_config_read32(bus, devfn, where, size, val);
+		if (ret != PCIBIOS_SUCCESSFUL)
+			return ret;
+
+		/* Don't advertise CRS SV support */
+		if ((where & ~0x3) == IPROC_PCI_EXP_CAP + PCI_EXP_RTCTL)
+			*val &= ~(PCI_EXP_RTCAP_CRSVIS << 16);
+		return PCIBIOS_SUCCESSFUL;
+	}
+
+	cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
+
+	if (!cfg_data_p)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	data = iproc_pcie_cfg_retry(cfg_data_p);
+
+	*val = data;
+	if (size <= 2)
+		*val = (data >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
 /**
  * Note access to the configuration registers are protected at the higher layer
  * by 'pci_lock' in drivers/pci/access.c
  */
 static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie,
-					    int busno,
-					    unsigned int devfn,
+					    int busno, unsigned int devfn,
 					    int where)
 {
 	unsigned slot = PCI_SLOT(devfn);
 	unsigned fn = PCI_FUNC(devfn);
-	u32 val;
 	u16 offset;
 
 	/* root complex access */
@@ -484,18 +581,7 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie,
 		if (slot > 0)
 			return NULL;
 
-	/* EP device access */
-	val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
-		(slot << CFG_ADDR_DEV_NUM_SHIFT) |
-		(fn << CFG_ADDR_FUNC_NUM_SHIFT) |
-		(where & CFG_ADDR_REG_NUM_MASK) |
-		(1 & CFG_ADDR_CFG_TYPE_MASK);
-	iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
-	offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
-	if (iproc_pcie_reg_is_invalid(offset))
-		return NULL;
-	else
-		return (pcie->base + offset);
+	return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where);
 }
 
 static void __iomem *iproc_pcie_bus_map_cfg_bus(struct pci_bus *bus,
@@ -554,9 +640,13 @@ static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 				    int where, int size, u32 *val)
 {
 	int ret;
+	struct iproc_pcie *pcie = iproc_data(bus);
 
 	iproc_pcie_apb_err_disable(bus, true);
-	ret = pci_generic_config_read32(bus, devfn, where, size, val);
+	if (pcie->type == IPROC_PCIE_PAXB_V2)
+		ret = iproc_pcie_config_read(bus, devfn, where, size, val);
+	else
+		ret = pci_generic_config_read32(bus, devfn, where, size, val);
 	iproc_pcie_apb_err_disable(bus, false);
 
 	return ret;
@@ -580,7 +670,7 @@ static struct pci_ops iproc_pcie_ops = {
 	.write = iproc_pcie_config_write32,
 };
 
-static void iproc_pcie_reset(struct iproc_pcie *pcie)
+static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert)
 {
 	u32 val;
 
@@ -592,26 +682,33 @@ static void iproc_pcie_reset(struct iproc_pcie *pcie)
 	if (pcie->ep_is_internal)
 		return;
 
-	/*
-	 * Select perst_b signal as reset source. Put the device into reset,
-	 * and then bring it out of reset
-	 */
-	val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
-	val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
-		~RC_PCIE_RST_OUTPUT;
-	iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
-	udelay(250);
-
-	val |= RC_PCIE_RST_OUTPUT;
-	iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
-	msleep(100);
+	if (assert) {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+		val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
+			~RC_PCIE_RST_OUTPUT;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+		udelay(250);
+	} else {
+		val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+		val |= RC_PCIE_RST_OUTPUT;
+		iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+		msleep(100);
+	}
 }
 
+int iproc_pcie_shutdown(struct iproc_pcie *pcie)
+{
+	iproc_pcie_perst_ctrl(pcie, true);
+	msleep(500);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iproc_pcie_shutdown);
+
 static int iproc_pcie_check_link(struct iproc_pcie *pcie)
 {
 	struct device *dev = pcie->dev;
 	u32 hdr_type, link_ctrl, link_status, class, val;
-	u16 pos = PCI_EXP_CAP;
 	bool link_is_active = false;
 
 	/*
@@ -628,16 +725,16 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie)
 	}
 
 	/* make sure we are not in EP mode */
-	iproc_pci_raw_config_read32(pcie,  0, PCI_HEADER_TYPE, 1, &hdr_type);
+	iproc_pci_raw_config_read32(pcie, 0, PCI_HEADER_TYPE, 1, &hdr_type);
 	if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) {
 		dev_err(dev, "in EP mode, hdr=%#02x\n", hdr_type);
 		return -EFAULT;
 	}
 
 	/* force class to PCI_CLASS_BRIDGE_PCI (0x0604) */
-#define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c
-#define PCI_CLASS_BRIDGE_MASK      0xffff00
-#define PCI_CLASS_BRIDGE_SHIFT     8
+#define PCI_BRIDGE_CTRL_REG_OFFSET	0x43c
+#define PCI_CLASS_BRIDGE_MASK		0xffff00
+#define PCI_CLASS_BRIDGE_SHIFT		8
 	iproc_pci_raw_config_read32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
 				    4, &class);
 	class &= ~PCI_CLASS_BRIDGE_MASK;
@@ -646,31 +743,31 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie)
 				     4, class);
 
 	/* check link status to see if link is active */
-	iproc_pci_raw_config_read32(pcie, 0, pos + PCI_EXP_LNKSTA,
+	iproc_pci_raw_config_read32(pcie, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
 				    2, &link_status);
 	if (link_status & PCI_EXP_LNKSTA_NLW)
 		link_is_active = true;
 
 	if (!link_is_active) {
 		/* try GEN 1 link speed */
-#define PCI_TARGET_LINK_SPEED_MASK    0xf
-#define PCI_TARGET_LINK_SPEED_GEN2    0x2
-#define PCI_TARGET_LINK_SPEED_GEN1    0x1
+#define PCI_TARGET_LINK_SPEED_MASK	0xf
+#define PCI_TARGET_LINK_SPEED_GEN2	0x2
+#define PCI_TARGET_LINK_SPEED_GEN1	0x1
 		iproc_pci_raw_config_read32(pcie, 0,
-					  pos + PCI_EXP_LNKCTL2, 4,
-					  &link_ctrl);
+					    IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2,
+					    4, &link_ctrl);
 		if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) ==
 		    PCI_TARGET_LINK_SPEED_GEN2) {
 			link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK;
 			link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1;
 			iproc_pci_raw_config_write32(pcie, 0,
-						pos + PCI_EXP_LNKCTL2,
-						4, link_ctrl);
+					IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2,
+					4, link_ctrl);
 			msleep(100);
 
 			iproc_pci_raw_config_read32(pcie, 0,
-						pos + PCI_EXP_LNKSTA,
-						2, &link_status);
+					IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
+					2, &link_status);
 			if (link_status & PCI_EXP_LNKSTA_NLW)
 				link_is_active = true;
 		}
@@ -1223,6 +1320,8 @@ static int iproc_pcie_rev_init(struct iproc_pcie *pcie)
 		pcie->ib.nr_regions = ARRAY_SIZE(paxb_v2_ib_map);
 		pcie->ib_map = paxb_v2_ib_map;
 		pcie->need_msi_steer = true;
+		dev_warn(dev, "reads of config registers that contain %#x return incorrect data\n",
+			 CFG_RETRY_STATUS);
 		break;
 	case IPROC_PCIE_PAXC:
 		regs = iproc_pcie_reg_paxc;
@@ -1286,7 +1385,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res)
 		goto err_exit_phy;
 	}
 
-	iproc_pcie_reset(pcie);
+	iproc_pcie_perst_ctrl(pcie, true);
+	iproc_pcie_perst_ctrl(pcie, false);
 
 	if (pcie->need_ob_cfg) {
 		ret = iproc_pcie_map_ranges(pcie, res);
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
index 0bbe2ea..a6b55ce 100644
--- a/drivers/pci/host/pcie-iproc.h
+++ b/drivers/pci/host/pcie-iproc.h
@@ -110,6 +110,7 @@ struct iproc_pcie {
 
 int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res);
 int iproc_pcie_remove(struct iproc_pcie *pcie);
+int iproc_pcie_shutdown(struct iproc_pcie *pcie);
 
 #ifdef CONFIG_PCIE_IPROC_MSI
 int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node);
diff --git a/drivers/pci/host/pcie-mediatek.c b/drivers/pci/host/pcie-mediatek.c
index 5a9d858..db93efd 100644
--- a/drivers/pci/host/pcie-mediatek.c
+++ b/drivers/pci/host/pcie-mediatek.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2017 MediaTek Inc.
  * Author: Ryder Lee <ryder.lee@mediatek.com>
+ *	   Honghui Zhang <honghui.zhang@mediatek.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -16,6 +17,9 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
@@ -63,16 +67,104 @@
 #define PCIE_FC_CREDIT_MASK	(GENMASK(31, 31) | GENMASK(28, 16))
 #define PCIE_FC_CREDIT_VAL(x)	((x) << 16)
 
+/* PCIe V2 share registers */
+#define PCIE_SYS_CFG_V2		0x0
+#define PCIE_CSR_LTSSM_EN(x)	BIT(0 + (x) * 8)
+#define PCIE_CSR_ASPM_L1_EN(x)	BIT(1 + (x) * 8)
+
+/* PCIe V2 per-port registers */
+#define PCIE_MSI_VECTOR		0x0c0
+#define PCIE_INT_MASK		0x420
+#define INTX_MASK		GENMASK(19, 16)
+#define INTX_SHIFT		16
+#define PCIE_INT_STATUS		0x424
+#define MSI_STATUS		BIT(23)
+#define PCIE_IMSI_STATUS	0x42c
+#define PCIE_IMSI_ADDR		0x430
+#define MSI_MASK		BIT(23)
+#define MTK_MSI_IRQS_NUM	32
+
+#define PCIE_AHB_TRANS_BASE0_L	0x438
+#define PCIE_AHB_TRANS_BASE0_H	0x43c
+#define AHB2PCIE_SIZE(x)	((x) & GENMASK(4, 0))
+#define PCIE_AXI_WINDOW0	0x448
+#define WIN_ENABLE		BIT(7)
+
+/* PCIe V2 configuration transaction header */
+#define PCIE_CFG_HEADER0	0x460
+#define PCIE_CFG_HEADER1	0x464
+#define PCIE_CFG_HEADER2	0x468
+#define PCIE_CFG_WDATA		0x470
+#define PCIE_APP_TLP_REQ	0x488
+#define PCIE_CFG_RDATA		0x48c
+#define APP_CFG_REQ		BIT(0)
+#define APP_CPL_STATUS		GENMASK(7, 5)
+
+#define CFG_WRRD_TYPE_0		4
+#define CFG_WR_FMT		2
+#define CFG_RD_FMT		0
+
+#define CFG_DW0_LENGTH(length)	((length) & GENMASK(9, 0))
+#define CFG_DW0_TYPE(type)	(((type) << 24) & GENMASK(28, 24))
+#define CFG_DW0_FMT(fmt)	(((fmt) << 29) & GENMASK(31, 29))
+#define CFG_DW2_REGN(regn)	((regn) & GENMASK(11, 2))
+#define CFG_DW2_FUN(fun)	(((fun) << 16) & GENMASK(18, 16))
+#define CFG_DW2_DEV(dev)	(((dev) << 19) & GENMASK(23, 19))
+#define CFG_DW2_BUS(bus)	(((bus) << 24) & GENMASK(31, 24))
+#define CFG_HEADER_DW0(type, fmt) \
+	(CFG_DW0_LENGTH(1) | CFG_DW0_TYPE(type) | CFG_DW0_FMT(fmt))
+#define CFG_HEADER_DW1(where, size) \
+	(GENMASK(((size) - 1), 0) << ((where) & 0x3))
+#define CFG_HEADER_DW2(regn, fun, dev, bus) \
+	(CFG_DW2_REGN(regn) | CFG_DW2_FUN(fun) | \
+	CFG_DW2_DEV(dev) | CFG_DW2_BUS(bus))
+
+#define PCIE_RST_CTRL		0x510
+#define PCIE_PHY_RSTB		BIT(0)
+#define PCIE_PIPE_SRSTB		BIT(1)
+#define PCIE_MAC_SRSTB		BIT(2)
+#define PCIE_CRSTB		BIT(3)
+#define PCIE_PERSTB		BIT(8)
+#define PCIE_LINKDOWN_RST_EN	GENMASK(15, 13)
+#define PCIE_LINK_STATUS_V2	0x804
+#define PCIE_PORT_LINKUP_V2	BIT(10)
+
+struct mtk_pcie_port;
+
+/**
+ * struct mtk_pcie_soc - differentiate between host generations
+ * @has_msi: whether this host supports MSI interrupts or not
+ * @ops: pointer to configuration access functions
+ * @startup: pointer to controller setting functions
+ * @setup_irq: pointer to initialize IRQ functions
+ */
+struct mtk_pcie_soc {
+	bool has_msi;
+	struct pci_ops *ops;
+	int (*startup)(struct mtk_pcie_port *port);
+	int (*setup_irq)(struct mtk_pcie_port *port, struct device_node *node);
+};
+
 /**
  * struct mtk_pcie_port - PCIe port information
  * @base: IO mapped register base
  * @list: port list
  * @pcie: pointer to PCIe host info
  * @reset: pointer to port reset control
- * @sys_ck: pointer to bus clock
- * @phy: pointer to phy control block
+ * @sys_ck: pointer to transaction/data link layer clock
+ * @ahb_ck: pointer to AHB slave interface operating clock for CSR access
+ *          and RC initiated MMIO access
+ * @axi_ck: pointer to application layer MMIO channel operating clock
+ * @aux_ck: pointer to pe2_mac_bridge and pe2_mac_core operating clock
+ *          when pcie_mac_ck/pcie_pipe_ck is turned off
+ * @obff_ck: pointer to OBFF functional block operating clock
+ * @pipe_ck: pointer to LTSSM and PHY/MAC layer operating clock
+ * @phy: pointer to PHY control block
  * @lane: lane count
- * @index: port index
+ * @slot: port slot
+ * @irq_domain: legacy INTx IRQ domain
+ * @msi_domain: MSI IRQ domain
+ * @msi_irq_in_use: bit map for assigned MSI IRQ
  */
 struct mtk_pcie_port {
 	void __iomem *base;
@@ -80,9 +172,17 @@ struct mtk_pcie_port {
 	struct mtk_pcie *pcie;
 	struct reset_control *reset;
 	struct clk *sys_ck;
+	struct clk *ahb_ck;
+	struct clk *axi_ck;
+	struct clk *aux_ck;
+	struct clk *obff_ck;
+	struct clk *pipe_ck;
 	struct phy *phy;
 	u32 lane;
-	u32 index;
+	u32 slot;
+	struct irq_domain *irq_domain;
+	struct irq_domain *msi_domain;
+	DECLARE_BITMAP(msi_irq_in_use, MTK_MSI_IRQS_NUM);
 };
 
 /**
@@ -96,6 +196,7 @@ struct mtk_pcie_port {
  * @busn: bus range
  * @offset: IO / Memory offset
  * @ports: pointer to PCIe port information
+ * @soc: pointer to SoC-dependent operations
  */
 struct mtk_pcie {
 	struct device *dev;
@@ -111,13 +212,9 @@ struct mtk_pcie {
 		resource_size_t io;
 	} offset;
 	struct list_head ports;
+	const struct mtk_pcie_soc *soc;
 };
 
-static inline bool mtk_pcie_link_up(struct mtk_pcie_port *port)
-{
-	return !!(readl(port->base + PCIE_LINK_STATUS) & PCIE_PORT_LINKUP);
-}
-
 static void mtk_pcie_subsys_powerdown(struct mtk_pcie *pcie)
 {
 	struct device *dev = pcie->dev;
@@ -146,6 +243,12 @@ static void mtk_pcie_put_resources(struct mtk_pcie *pcie)
 
 	list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
 		phy_power_off(port->phy);
+		phy_exit(port->phy);
+		clk_disable_unprepare(port->pipe_ck);
+		clk_disable_unprepare(port->obff_ck);
+		clk_disable_unprepare(port->axi_ck);
+		clk_disable_unprepare(port->aux_ck);
+		clk_disable_unprepare(port->ahb_ck);
 		clk_disable_unprepare(port->sys_ck);
 		mtk_pcie_port_free(port);
 	}
@@ -153,11 +256,412 @@ static void mtk_pcie_put_resources(struct mtk_pcie *pcie)
 	mtk_pcie_subsys_powerdown(pcie);
 }
 
+static int mtk_pcie_check_cfg_cpld(struct mtk_pcie_port *port)
+{
+	u32 val;
+	int err;
+
+	err = readl_poll_timeout_atomic(port->base + PCIE_APP_TLP_REQ, val,
+					!(val & APP_CFG_REQ), 10,
+					100 * USEC_PER_MSEC);
+	if (err)
+		return PCIBIOS_SET_FAILED;
+
+	if (readl(port->base + PCIE_APP_TLP_REQ) & APP_CPL_STATUS)
+		return PCIBIOS_SET_FAILED;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int mtk_pcie_hw_rd_cfg(struct mtk_pcie_port *port, u32 bus, u32 devfn,
+			      int where, int size, u32 *val)
+{
+	u32 tmp;
+
+	/* Write PCIe configuration transaction header for Cfgrd */
+	writel(CFG_HEADER_DW0(CFG_WRRD_TYPE_0, CFG_RD_FMT),
+	       port->base + PCIE_CFG_HEADER0);
+	writel(CFG_HEADER_DW1(where, size), port->base + PCIE_CFG_HEADER1);
+	writel(CFG_HEADER_DW2(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus),
+	       port->base + PCIE_CFG_HEADER2);
+
+	/* Trigger h/w to transmit Cfgrd TLP */
+	tmp = readl(port->base + PCIE_APP_TLP_REQ);
+	tmp |= APP_CFG_REQ;
+	writel(tmp, port->base + PCIE_APP_TLP_REQ);
+
+	/* Check completion status */
+	if (mtk_pcie_check_cfg_cpld(port))
+		return PCIBIOS_SET_FAILED;
+
+	/* Read cpld payload of Cfgrd */
+	*val = readl(port->base + PCIE_CFG_RDATA);
+
+	if (size == 1)
+		*val = (*val >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (8 * (where & 3))) & 0xffff;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int mtk_pcie_hw_wr_cfg(struct mtk_pcie_port *port, u32 bus, u32 devfn,
+			      int where, int size, u32 val)
+{
+	/* Write PCIe configuration transaction header for Cfgwr */
+	writel(CFG_HEADER_DW0(CFG_WRRD_TYPE_0, CFG_WR_FMT),
+	       port->base + PCIE_CFG_HEADER0);
+	writel(CFG_HEADER_DW1(where, size), port->base + PCIE_CFG_HEADER1);
+	writel(CFG_HEADER_DW2(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus),
+	       port->base + PCIE_CFG_HEADER2);
+
+	/* Write Cfgwr data */
+	val = val << 8 * (where & 3);
+	writel(val, port->base + PCIE_CFG_WDATA);
+
+	/* Trigger h/w to transmit Cfgwr TLP */
+	val = readl(port->base + PCIE_APP_TLP_REQ);
+	val |= APP_CFG_REQ;
+	writel(val, port->base + PCIE_APP_TLP_REQ);
+
+	/* Check completion status */
+	return mtk_pcie_check_cfg_cpld(port);
+}
+
+static struct mtk_pcie_port *mtk_pcie_find_port(struct pci_bus *bus,
+						unsigned int devfn)
+{
+	struct mtk_pcie *pcie = bus->sysdata;
+	struct mtk_pcie_port *port;
+
+	list_for_each_entry(port, &pcie->ports, list)
+		if (port->slot == PCI_SLOT(devfn))
+			return port;
+
+	return NULL;
+}
+
+static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+				int where, int size, u32 *val)
+{
+	struct mtk_pcie_port *port;
+	u32 bn = bus->number;
+	int ret;
+
+	port = mtk_pcie_find_port(bus, devfn);
+	if (!port) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	ret = mtk_pcie_hw_rd_cfg(port, bn, devfn, where, size, val);
+	if (ret)
+		*val = ~0;
+
+	return ret;
+}
+
+static int mtk_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, u32 val)
+{
+	struct mtk_pcie_port *port;
+	u32 bn = bus->number;
+
+	port = mtk_pcie_find_port(bus, devfn);
+	if (!port)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return mtk_pcie_hw_wr_cfg(port, bn, devfn, where, size, val);
+}
+
+static struct pci_ops mtk_pcie_ops_v2 = {
+	.read  = mtk_pcie_config_read,
+	.write = mtk_pcie_config_write,
+};
+
+static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	struct resource *mem = &pcie->mem;
+	u32 val;
+	size_t size;
+	int err;
+
+	/* MT7622 platforms need to enable LTSSM and ASPM from PCIe subsys */
+	if (pcie->base) {
+		val = readl(pcie->base + PCIE_SYS_CFG_V2);
+		val |= PCIE_CSR_LTSSM_EN(port->slot) |
+		       PCIE_CSR_ASPM_L1_EN(port->slot);
+		writel(val, pcie->base + PCIE_SYS_CFG_V2);
+	}
+
+	/* Assert all reset signals */
+	writel(0, port->base + PCIE_RST_CTRL);
+
+	/*
+	 * Enable PCIe link down reset, if link status changed from link up to
+	 * link down, this will reset MAC control registers and configuration
+	 * space.
+	 */
+	writel(PCIE_LINKDOWN_RST_EN, port->base + PCIE_RST_CTRL);
+
+	/* De-assert PHY, PE, PIPE, MAC and configuration reset	*/
+	val = readl(port->base + PCIE_RST_CTRL);
+	val |= PCIE_PHY_RSTB | PCIE_PERSTB | PCIE_PIPE_SRSTB |
+	       PCIE_MAC_SRSTB | PCIE_CRSTB;
+	writel(val, port->base + PCIE_RST_CTRL);
+
+	/* 100ms timeout value should be enough for Gen1/2 training */
+	err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_V2, val,
+				 !!(val & PCIE_PORT_LINKUP_V2), 20,
+				 100 * USEC_PER_MSEC);
+	if (err)
+		return -ETIMEDOUT;
+
+	/* Set INTx mask */
+	val = readl(port->base + PCIE_INT_MASK);
+	val &= ~INTX_MASK;
+	writel(val, port->base + PCIE_INT_MASK);
+
+	/* Set AHB to PCIe translation windows */
+	size = mem->end - mem->start;
+	val = lower_32_bits(mem->start) | AHB2PCIE_SIZE(fls(size));
+	writel(val, port->base + PCIE_AHB_TRANS_BASE0_L);
+
+	val = upper_32_bits(mem->start);
+	writel(val, port->base + PCIE_AHB_TRANS_BASE0_H);
+
+	/* Set PCIe to AXI translation memory space.*/
+	val = fls(0xffffffff) | WIN_ENABLE;
+	writel(val, port->base + PCIE_AXI_WINDOW0);
+
+	return 0;
+}
+
+static int mtk_pcie_msi_alloc(struct mtk_pcie_port *port)
+{
+	int msi;
+
+	msi = find_first_zero_bit(port->msi_irq_in_use, MTK_MSI_IRQS_NUM);
+	if (msi < MTK_MSI_IRQS_NUM)
+		set_bit(msi, port->msi_irq_in_use);
+	else
+		return -ENOSPC;
+
+	return msi;
+}
+
+static void mtk_pcie_msi_free(struct mtk_pcie_port *port, unsigned long hwirq)
+{
+	clear_bit(hwirq, port->msi_irq_in_use);
+}
+
+static int mtk_pcie_msi_setup_irq(struct msi_controller *chip,
+				  struct pci_dev *pdev, struct msi_desc *desc)
+{
+	struct mtk_pcie_port *port;
+	struct msi_msg msg;
+	unsigned int irq;
+	int hwirq;
+	phys_addr_t msg_addr;
+
+	port = mtk_pcie_find_port(pdev->bus, pdev->devfn);
+	if (!port)
+		return -EINVAL;
+
+	hwirq = mtk_pcie_msi_alloc(port);
+	if (hwirq < 0)
+		return hwirq;
+
+	irq = irq_create_mapping(port->msi_domain, hwirq);
+	if (!irq) {
+		mtk_pcie_msi_free(port, hwirq);
+		return -EINVAL;
+	}
+
+	chip->dev = &pdev->dev;
+
+	irq_set_msi_desc(irq, desc);
+
+	/* MT2712/MT7622 only support 32-bit MSI addresses */
+	msg_addr = virt_to_phys(port->base + PCIE_MSI_VECTOR);
+	msg.address_hi = 0;
+	msg.address_lo = lower_32_bits(msg_addr);
+	msg.data = hwirq;
+
+	pci_write_msi_msg(irq, &msg);
+
+	return 0;
+}
+
+static void mtk_msi_teardown_irq(struct msi_controller *chip, unsigned int irq)
+{
+	struct pci_dev *pdev = to_pci_dev(chip->dev);
+	struct irq_data *d = irq_get_irq_data(irq);
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	struct mtk_pcie_port *port;
+
+	port = mtk_pcie_find_port(pdev->bus, pdev->devfn);
+	if (!port)
+		return;
+
+	irq_dispose_mapping(irq);
+	mtk_pcie_msi_free(port, hwirq);
+}
+
+static struct msi_controller mtk_pcie_msi_chip = {
+	.setup_irq = mtk_pcie_msi_setup_irq,
+	.teardown_irq = mtk_msi_teardown_irq,
+};
+
+static struct irq_chip mtk_msi_irq_chip = {
+	.name = "MTK PCIe MSI",
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
+};
+
+static int mtk_pcie_msi_map(struct irq_domain *domain, unsigned int irq,
+			    irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &mtk_msi_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.map = mtk_pcie_msi_map,
+};
+
+static void mtk_pcie_enable_msi(struct mtk_pcie_port *port)
+{
+	u32 val;
+	phys_addr_t msg_addr;
+
+	msg_addr = virt_to_phys(port->base + PCIE_MSI_VECTOR);
+	val = lower_32_bits(msg_addr);
+	writel(val, port->base + PCIE_IMSI_ADDR);
+
+	val = readl(port->base + PCIE_INT_MASK);
+	val &= ~MSI_MASK;
+	writel(val, port->base + PCIE_INT_MASK);
+}
+
+static int mtk_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+			     irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = mtk_pcie_intx_map,
+};
+
+static int mtk_pcie_init_irq_domain(struct mtk_pcie_port *port,
+				    struct device_node *node)
+{
+	struct device *dev = port->pcie->dev;
+	struct device_node *pcie_intc_node;
+
+	/* Setup INTx */
+	pcie_intc_node = of_get_next_child(node, NULL);
+	if (!pcie_intc_node) {
+		dev_err(dev, "no PCIe Intc node found\n");
+		return -ENODEV;
+	}
+
+	port->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+						 &intx_domain_ops, port);
+	if (!port->irq_domain) {
+		dev_err(dev, "failed to get INTx IRQ domain\n");
+		return -ENODEV;
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		port->msi_domain = irq_domain_add_linear(node, MTK_MSI_IRQS_NUM,
+							 &msi_domain_ops,
+							 &mtk_pcie_msi_chip);
+		if (!port->msi_domain) {
+			dev_err(dev, "failed to create MSI IRQ domain\n");
+			return -ENODEV;
+		}
+		mtk_pcie_enable_msi(port);
+	}
+
+	return 0;
+}
+
+static irqreturn_t mtk_pcie_intr_handler(int irq, void *data)
+{
+	struct mtk_pcie_port *port = (struct mtk_pcie_port *)data;
+	unsigned long status;
+	u32 virq;
+	u32 bit = INTX_SHIFT;
+
+	while ((status = readl(port->base + PCIE_INT_STATUS)) & INTX_MASK) {
+		for_each_set_bit_from(bit, &status, PCI_NUM_INTX + INTX_SHIFT) {
+			/* Clear the INTx */
+			writel(1 << bit, port->base + PCIE_INT_STATUS);
+			virq = irq_find_mapping(port->irq_domain,
+						bit - INTX_SHIFT);
+			generic_handle_irq(virq);
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		while ((status = readl(port->base + PCIE_INT_STATUS)) & MSI_STATUS) {
+			unsigned long imsi_status;
+
+			while ((imsi_status = readl(port->base + PCIE_IMSI_STATUS))) {
+				for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM) {
+					/* Clear the MSI */
+					writel(1 << bit, port->base + PCIE_IMSI_STATUS);
+					virq = irq_find_mapping(port->msi_domain, bit);
+					generic_handle_irq(virq);
+				}
+			}
+			/* Clear MSI interrupt status */
+			writel(MSI_STATUS, port->base + PCIE_INT_STATUS);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mtk_pcie_setup_irq(struct mtk_pcie_port *port,
+			      struct device_node *node)
+{
+	struct mtk_pcie *pcie = port->pcie;
+	struct device *dev = pcie->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int err, irq;
+
+	irq = platform_get_irq(pdev, port->slot);
+	err = devm_request_irq(dev, irq, mtk_pcie_intr_handler,
+			       IRQF_SHARED, "mtk-pcie", port);
+	if (err) {
+		dev_err(dev, "unable to request IRQ %d\n", irq);
+		return err;
+	}
+
+	err = mtk_pcie_init_irq_domain(port, node);
+	if (err) {
+		dev_err(dev, "failed to init PCIe IRQ domain\n");
+		return err;
+	}
+
+	return 0;
+}
+
 static void __iomem *mtk_pcie_map_bus(struct pci_bus *bus,
 				      unsigned int devfn, int where)
 {
-	struct pci_host_bridge *host = pci_find_host_bridge(bus);
-	struct mtk_pcie *pcie = pci_host_bridge_priv(host);
+	struct mtk_pcie *pcie = bus->sysdata;
 
 	writel(PCIE_CONF_ADDR(where, PCI_FUNC(devfn), PCI_SLOT(devfn),
 			      bus->number), pcie->base + PCIE_CFG_ADDR);
@@ -171,16 +675,34 @@ static struct pci_ops mtk_pcie_ops = {
 	.write = pci_generic_config_write,
 };
 
-static void mtk_pcie_configure_rc(struct mtk_pcie_port *port)
+static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
 {
 	struct mtk_pcie *pcie = port->pcie;
-	u32 func = PCI_FUNC(port->index << 3);
-	u32 slot = PCI_SLOT(port->index << 3);
+	u32 func = PCI_FUNC(port->slot << 3);
+	u32 slot = PCI_SLOT(port->slot << 3);
 	u32 val;
+	int err;
+
+	/* assert port PERST_N */
+	val = readl(pcie->base + PCIE_SYS_CFG);
+	val |= PCIE_PORT_PERST(port->slot);
+	writel(val, pcie->base + PCIE_SYS_CFG);
+
+	/* de-assert port PERST_N */
+	val = readl(pcie->base + PCIE_SYS_CFG);
+	val &= ~PCIE_PORT_PERST(port->slot);
+	writel(val, pcie->base + PCIE_SYS_CFG);
+
+	/* 100ms timeout value should be enough for Gen1/2 training */
+	err = readl_poll_timeout(port->base + PCIE_LINK_STATUS, val,
+				 !!(val & PCIE_PORT_LINKUP), 20,
+				 100 * USEC_PER_MSEC);
+	if (err)
+		return -ETIMEDOUT;
 
 	/* enable interrupt */
 	val = readl(pcie->base + PCIE_INT_ENABLE);
-	val |= PCIE_PORT_INT_EN(port->index);
+	val |= PCIE_PORT_INT_EN(port->slot);
 	writel(val, pcie->base + PCIE_INT_ENABLE);
 
 	/* map to all DDR region. We need to set it before cfg operation. */
@@ -209,67 +731,94 @@ static void mtk_pcie_configure_rc(struct mtk_pcie_port *port)
 	writel(PCIE_CONF_ADDR(PCIE_FTS_NUM, func, slot, 0),
 	       pcie->base + PCIE_CFG_ADDR);
 	writel(val, pcie->base + PCIE_CFG_DATA);
+
+	return 0;
 }
 
-static void mtk_pcie_assert_ports(struct mtk_pcie_port *port)
+static void mtk_pcie_enable_port(struct mtk_pcie_port *port)
 {
 	struct mtk_pcie *pcie = port->pcie;
-	u32 val;
-
-	/* assert port PERST_N */
-	val = readl(pcie->base + PCIE_SYS_CFG);
-	val |= PCIE_PORT_PERST(port->index);
-	writel(val, pcie->base + PCIE_SYS_CFG);
-
-	/* de-assert port PERST_N */
-	val = readl(pcie->base + PCIE_SYS_CFG);
-	val &= ~PCIE_PORT_PERST(port->index);
-	writel(val, pcie->base + PCIE_SYS_CFG);
-
-	/* PCIe v2.0 need at least 100ms delay to train from Gen1 to Gen2 */
-	msleep(100);
-}
-
-static void mtk_pcie_enable_ports(struct mtk_pcie_port *port)
-{
-	struct device *dev = port->pcie->dev;
+	struct device *dev = pcie->dev;
 	int err;
 
 	err = clk_prepare_enable(port->sys_ck);
 	if (err) {
-		dev_err(dev, "failed to enable port%d clock\n", port->index);
+		dev_err(dev, "failed to enable sys_ck%d clock\n", port->slot);
 		goto err_sys_clk;
 	}
 
+	err = clk_prepare_enable(port->ahb_ck);
+	if (err) {
+		dev_err(dev, "failed to enable ahb_ck%d\n", port->slot);
+		goto err_ahb_clk;
+	}
+
+	err = clk_prepare_enable(port->aux_ck);
+	if (err) {
+		dev_err(dev, "failed to enable aux_ck%d\n", port->slot);
+		goto err_aux_clk;
+	}
+
+	err = clk_prepare_enable(port->axi_ck);
+	if (err) {
+		dev_err(dev, "failed to enable axi_ck%d\n", port->slot);
+		goto err_axi_clk;
+	}
+
+	err = clk_prepare_enable(port->obff_ck);
+	if (err) {
+		dev_err(dev, "failed to enable obff_ck%d\n", port->slot);
+		goto err_obff_clk;
+	}
+
+	err = clk_prepare_enable(port->pipe_ck);
+	if (err) {
+		dev_err(dev, "failed to enable pipe_ck%d\n", port->slot);
+		goto err_pipe_clk;
+	}
+
 	reset_control_assert(port->reset);
 	reset_control_deassert(port->reset);
 
+	err = phy_init(port->phy);
+	if (err) {
+		dev_err(dev, "failed to initialize port%d phy\n", port->slot);
+		goto err_phy_init;
+	}
+
 	err = phy_power_on(port->phy);
 	if (err) {
-		dev_err(dev, "failed to power on port%d phy\n", port->index);
+		dev_err(dev, "failed to power on port%d phy\n", port->slot);
 		goto err_phy_on;
 	}
 
-	mtk_pcie_assert_ports(port);
-
-	/* if link up, then setup root port configuration space */
-	if (mtk_pcie_link_up(port)) {
-		mtk_pcie_configure_rc(port);
+	if (!pcie->soc->startup(port))
 		return;
-	}
 
-	dev_info(dev, "Port%d link down\n", port->index);
+	dev_info(dev, "Port%d link down\n", port->slot);
 
 	phy_power_off(port->phy);
 err_phy_on:
+	phy_exit(port->phy);
+err_phy_init:
+	clk_disable_unprepare(port->pipe_ck);
+err_pipe_clk:
+	clk_disable_unprepare(port->obff_ck);
+err_obff_clk:
+	clk_disable_unprepare(port->axi_ck);
+err_axi_clk:
+	clk_disable_unprepare(port->aux_ck);
+err_aux_clk:
+	clk_disable_unprepare(port->ahb_ck);
+err_ahb_clk:
 	clk_disable_unprepare(port->sys_ck);
 err_sys_clk:
 	mtk_pcie_port_free(port);
 }
 
-static int mtk_pcie_parse_ports(struct mtk_pcie *pcie,
-				struct device_node *node,
-				int index)
+static int mtk_pcie_parse_port(struct mtk_pcie *pcie,
+			       struct device_node *node,
+			       int slot)
 {
 	struct mtk_pcie_port *port;
 	struct resource *regs;
@@ -288,34 +837,87 @@ static int mtk_pcie_parse_ports(struct mtk_pcie *pcie,
 		return err;
 	}
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
+	snprintf(name, sizeof(name), "port%d", slot);
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
 	port->base = devm_ioremap_resource(dev, regs);
 	if (IS_ERR(port->base)) {
-		dev_err(dev, "failed to map port%d base\n", index);
+		dev_err(dev, "failed to map port%d base\n", slot);
 		return PTR_ERR(port->base);
 	}
 
-	snprintf(name, sizeof(name), "sys_ck%d", index);
+	snprintf(name, sizeof(name), "sys_ck%d", slot);
 	port->sys_ck = devm_clk_get(dev, name);
 	if (IS_ERR(port->sys_ck)) {
-		dev_err(dev, "failed to get port%d clock\n", index);
+		dev_err(dev, "failed to get sys_ck%d clock\n", slot);
 		return PTR_ERR(port->sys_ck);
 	}
 
-	snprintf(name, sizeof(name), "pcie-rst%d", index);
-	port->reset = devm_reset_control_get_optional(dev, name);
+	/* sys_ck might be divided into the following parts in some chips */
+	snprintf(name, sizeof(name), "ahb_ck%d", slot);
+	port->ahb_ck = devm_clk_get(dev, name);
+	if (IS_ERR(port->ahb_ck)) {
+		if (PTR_ERR(port->ahb_ck) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		port->ahb_ck = NULL;
+	}
+
+	snprintf(name, sizeof(name), "axi_ck%d", slot);
+	port->axi_ck = devm_clk_get(dev, name);
+	if (IS_ERR(port->axi_ck)) {
+		if (PTR_ERR(port->axi_ck) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		port->axi_ck = NULL;
+	}
+
+	snprintf(name, sizeof(name), "aux_ck%d", slot);
+	port->aux_ck = devm_clk_get(dev, name);
+	if (IS_ERR(port->aux_ck)) {
+		if (PTR_ERR(port->aux_ck) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		port->aux_ck = NULL;
+	}
+
+	snprintf(name, sizeof(name), "obff_ck%d", slot);
+	port->obff_ck = devm_clk_get(dev, name);
+	if (IS_ERR(port->obff_ck)) {
+		if (PTR_ERR(port->obff_ck) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		port->obff_ck = NULL;
+	}
+
+	snprintf(name, sizeof(name), "pipe_ck%d", slot);
+	port->pipe_ck = devm_clk_get(dev, name);
+	if (IS_ERR(port->pipe_ck)) {
+		if (PTR_ERR(port->pipe_ck) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		port->pipe_ck = NULL;
+	}
+
+	snprintf(name, sizeof(name), "pcie-rst%d", slot);
+	port->reset = devm_reset_control_get_optional_exclusive(dev, name);
 	if (PTR_ERR(port->reset) == -EPROBE_DEFER)
 		return PTR_ERR(port->reset);
 
 	/* some platforms may use default PHY setting */
-	snprintf(name, sizeof(name), "pcie-phy%d", index);
+	snprintf(name, sizeof(name), "pcie-phy%d", slot);
 	port->phy = devm_phy_optional_get(dev, name);
 	if (IS_ERR(port->phy))
 		return PTR_ERR(port->phy);
 
-	port->index = index;
+	port->slot = slot;
 	port->pcie = pcie;
 
+	if (pcie->soc->setup_irq) {
+		err = pcie->soc->setup_irq(port, node);
+		if (err)
+			return err;
+	}
+
 	INIT_LIST_HEAD(&port->list);
 	list_add_tail(&port->list, &pcie->ports);
 
@@ -329,12 +931,14 @@ static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie)
 	struct resource *regs;
 	int err;
 
-	/* get shared registers */
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pcie->base = devm_ioremap_resource(dev, regs);
-	if (IS_ERR(pcie->base)) {
-		dev_err(dev, "failed to map shared register\n");
-		return PTR_ERR(pcie->base);
+	/* get shared registers, which are optional */
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "subsys");
+	if (regs) {
+		pcie->base = devm_ioremap_resource(dev, regs);
+		if (IS_ERR(pcie->base)) {
+			dev_err(dev, "failed to map shared register\n");
+			return PTR_ERR(pcie->base);
+		}
 	}
 
 	pcie->free_ck = devm_clk_get(dev, "free_ck");
@@ -422,7 +1026,7 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie)
 	}
 
 	for_each_available_child_of_node(node, child) {
-		int index;
+		int slot;
 
 		err = of_pci_get_devfn(child);
 		if (err < 0) {
@@ -430,9 +1034,9 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie)
 			return err;
 		}
 
-		index = PCI_SLOT(err);
+		slot = PCI_SLOT(err);
 
-		err = mtk_pcie_parse_ports(pcie, child, index);
+		err = mtk_pcie_parse_port(pcie, child, slot);
 		if (err)
 			return err;
 	}
@@ -443,7 +1047,7 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie)
 
 	/* enable each port, and then check link status */
 	list_for_each_entry_safe(port, tmp, &pcie->ports, list)
-		mtk_pcie_enable_ports(port);
+		mtk_pcie_enable_port(port);
 
 	/* power down PCIe subsys if slots are all empty (link down) */
 	if (list_empty(&pcie->ports))
@@ -480,9 +1084,12 @@ static int mtk_pcie_register_host(struct pci_host_bridge *host)
 
 	host->busnr = pcie->busn.start;
 	host->dev.parent = pcie->dev;
-	host->ops = &mtk_pcie_ops;
+	host->ops = pcie->soc->ops;
 	host->map_irq = of_irq_parse_and_map_pci;
 	host->swizzle_irq = pci_common_swizzle;
+	host->sysdata = pcie;
+	if (IS_ENABLED(CONFIG_PCI_MSI) && pcie->soc->has_msi)
+		host->msi = &mtk_pcie_msi_chip;
 
 	err = pci_scan_root_bus_bridge(host);
 	if (err < 0)
@@ -513,6 +1120,7 @@ static int mtk_pcie_probe(struct platform_device *pdev)
 	pcie = pci_host_bridge_priv(host);
 
 	pcie->dev = dev;
+	pcie->soc = of_device_get_match_data(dev);
 	platform_set_drvdata(pdev, pcie);
 	INIT_LIST_HEAD(&pcie->ports);
 
@@ -537,9 +1145,23 @@ static int mtk_pcie_probe(struct platform_device *pdev)
 	return err;
 }
 
+static const struct mtk_pcie_soc mtk_pcie_soc_v1 = {
+	.ops = &mtk_pcie_ops,
+	.startup = mtk_pcie_startup_port,
+};
+
+static const struct mtk_pcie_soc mtk_pcie_soc_v2 = {
+	.has_msi = true,
+	.ops = &mtk_pcie_ops_v2,
+	.startup = mtk_pcie_startup_port_v2,
+	.setup_irq = mtk_pcie_setup_irq,
+};
+
 static const struct of_device_id mtk_pcie_ids[] = {
-	{ .compatible = "mediatek,mt7623-pcie"},
-	{ .compatible = "mediatek,mt2701-pcie"},
+	{ .compatible = "mediatek,mt2701-pcie", .data = &mtk_pcie_soc_v1 },
+	{ .compatible = "mediatek,mt7623-pcie", .data = &mtk_pcie_soc_v1 },
+	{ .compatible = "mediatek,mt2712-pcie", .data = &mtk_pcie_soc_v2 },
+	{ .compatible = "mediatek,mt7622-pcie", .data = &mtk_pcie_soc_v2 },
 	{},
 };
 
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 246d485..4e0b25d 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -471,10 +471,8 @@ static int rcar_pcie_enable(struct rcar_pcie *pcie)
 		bridge->msi = &pcie->msi.chip;
 
 	ret = pci_scan_root_bus_bridge(bridge);
-	if (ret < 0) {
-		kfree(bridge);
+	if (ret < 0)
 		return ret;
-	}
 
 	bus = bridge->bus;
 
@@ -1190,14 +1188,16 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_free_bridge:
-	pci_free_host_bridge(bridge);
-
 err_pm_put:
 	pm_runtime_put(dev);
 
 err_pm_disable:
 	pm_runtime_disable(dev);
+
+err_free_bridge:
+	pci_free_host_bridge(bridge);
+	pci_free_resource_list(&pcie->resources);
+
 	return err;
 }
 
diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index 7bb9870..9051c6c 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -6,7 +6,7 @@
  * Author: Shawn Lin <shawn.lin@rock-chips.com>
  *         Wenrui Li <wenrui.li@rock-chips.com>
  *
- * Bits taken from Synopsys Designware Host controller driver and
+ * Bits taken from Synopsys DesignWare Host controller driver and
  * ARM PCI Host generic driver.
  *
  * This program is free software: you can redistribute it and/or modify
@@ -15,6 +15,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/bitrev.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
@@ -47,6 +48,7 @@
 #define HIWORD_UPDATE_BIT(val)		HIWORD_UPDATE(val, val)
 
 #define ENCODE_LANES(x)			((((x) >> 1) & 3) << 4)
+#define MAX_LANE_NUM			4
 
 #define PCIE_CLIENT_BASE		0x0
 #define PCIE_CLIENT_CONFIG		(PCIE_CLIENT_BASE + 0x00)
@@ -111,6 +113,9 @@
 #define   PCIE_CORE_TXCREDIT_CFG1_MUI_SHIFT	16
 #define   PCIE_CORE_TXCREDIT_CFG1_MUI_ENCODE(x) \
 		(((x) >> 3) << PCIE_CORE_TXCREDIT_CFG1_MUI_SHIFT)
+#define PCIE_CORE_LANE_MAP             (PCIE_CORE_CTRL_MGMT_BASE + 0x200)
+#define   PCIE_CORE_LANE_MAP_MASK              0x0000000f
+#define   PCIE_CORE_LANE_MAP_REVERSE           BIT(16)
 #define PCIE_CORE_INT_STATUS		(PCIE_CORE_CTRL_MGMT_BASE + 0x20c)
 #define   PCIE_CORE_INT_PRFPE			BIT(0)
 #define   PCIE_CORE_INT_CRFPE			BIT(1)
@@ -210,7 +215,8 @@
 struct rockchip_pcie {
 	void	__iomem *reg_base;		/* DT axi-base */
 	void	__iomem *apb_base;		/* DT apb-base */
-	struct	phy *phy;
+	bool    legacy_phy;
+	struct  phy *phys[MAX_LANE_NUM];
 	struct	reset_control *core_rst;
 	struct	reset_control *mgmt_rst;
 	struct	reset_control *mgmt_sticky_rst;
@@ -222,11 +228,13 @@ struct rockchip_pcie {
 	struct	clk *aclk_perf_pcie;
 	struct	clk *hclk_pcie;
 	struct	clk *clk_pcie_pm;
+	struct	regulator *vpcie12v; /* 12V power supply */
 	struct	regulator *vpcie3v3; /* 3.3V power supply */
 	struct	regulator *vpcie1v8; /* 1.8V power supply */
 	struct	regulator *vpcie0v9; /* 0.9V power supply */
 	struct	gpio_desc *ep_gpio;
 	u32	lanes;
+	u8      lanes_map;
 	u8	root_bus_nr;
 	int	link_gen;
 	struct	device *dev;
@@ -299,6 +307,24 @@ static int rockchip_pcie_valid_device(struct rockchip_pcie *rockchip,
 	return 1;
 }
 
+static u8 rockchip_pcie_lane_map(struct rockchip_pcie *rockchip)
+{
+	u32 val;
+	u8 map;
+
+	if (rockchip->legacy_phy)
+		return GENMASK(MAX_LANE_NUM - 1, 0);
+
+	val = rockchip_pcie_read(rockchip, PCIE_CORE_LANE_MAP);
+	map = val & PCIE_CORE_LANE_MAP_MASK;
+
+	/* The link may be using a reverse-indexed mapping. */
+	if (val & PCIE_CORE_LANE_MAP_REVERSE)
+		map = bitrev8(map) >> 4;
+
+	return map;
+}
+
 static int rockchip_pcie_rd_own_conf(struct rockchip_pcie *rockchip,
 				     int where, int size, u32 *val)
 {
@@ -514,10 +540,10 @@ static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
 static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 {
 	struct device *dev = rockchip->dev;
-	int err;
+	int err, i;
 	u32 status;
 
-	gpiod_set_value(rockchip->ep_gpio, 0);
+	gpiod_set_value_cansleep(rockchip->ep_gpio, 0);
 
 	err = reset_control_assert(rockchip->aclk_rst);
 	if (err) {
@@ -537,34 +563,36 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 		return err;
 	}
 
-	err = phy_init(rockchip->phy);
-	if (err < 0) {
-		dev_err(dev, "fail to init phy, err %d\n", err);
-		return err;
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		err = phy_init(rockchip->phys[i]);
+		if (err) {
+			dev_err(dev, "init phy%d err %d\n", i, err);
+			goto err_exit_phy;
+		}
 	}
 
 	err = reset_control_assert(rockchip->core_rst);
 	if (err) {
 		dev_err(dev, "assert core_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	err = reset_control_assert(rockchip->mgmt_rst);
 	if (err) {
 		dev_err(dev, "assert mgmt_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	err = reset_control_assert(rockchip->mgmt_sticky_rst);
 	if (err) {
 		dev_err(dev, "assert mgmt_sticky_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	err = reset_control_assert(rockchip->pipe_rst);
 	if (err) {
 		dev_err(dev, "assert pipe_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	udelay(10);
@@ -572,19 +600,19 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 	err = reset_control_deassert(rockchip->pm_rst);
 	if (err) {
 		dev_err(dev, "deassert pm_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	err = reset_control_deassert(rockchip->aclk_rst);
 	if (err) {
 		dev_err(dev, "deassert aclk_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	err = reset_control_deassert(rockchip->pclk_rst);
 	if (err) {
 		dev_err(dev, "deassert pclk_rst err %d\n", err);
-		return err;
+		goto err_exit_phy;
 	}
 
 	if (rockchip->link_gen == 2)
@@ -602,10 +630,12 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 			    PCIE_CLIENT_MODE_RC,
 			    PCIE_CLIENT_CONFIG);
 
-	err = phy_power_on(rockchip->phy);
-	if (err) {
-		dev_err(dev, "fail to power on phy, err %d\n", err);
-		return err;
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		err = phy_power_on(rockchip->phys[i]);
+		if (err) {
+			dev_err(dev, "power on phy%d err %d\n", i, err);
+			goto err_power_off_phy;
+		}
 	}
 
 	/*
@@ -615,25 +645,25 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 	err = reset_control_deassert(rockchip->mgmt_sticky_rst);
 	if (err) {
 		dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err);
-		return err;
+		goto err_power_off_phy;
 	}
 
 	err = reset_control_deassert(rockchip->core_rst);
 	if (err) {
 		dev_err(dev, "deassert core_rst err %d\n", err);
-		return err;
+		goto err_power_off_phy;
 	}
 
 	err = reset_control_deassert(rockchip->mgmt_rst);
 	if (err) {
 		dev_err(dev, "deassert mgmt_rst err %d\n", err);
-		return err;
+		goto err_power_off_phy;
 	}
 
 	err = reset_control_deassert(rockchip->pipe_rst);
 	if (err) {
 		dev_err(dev, "deassert pipe_rst err %d\n", err);
-		return err;
+		goto err_power_off_phy;
 	}
 
 	/* Fix the transmitted FTS count desired to exit from L0s. */
@@ -658,7 +688,7 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 	rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE,
 			    PCIE_CLIENT_CONFIG);
 
-	gpiod_set_value(rockchip->ep_gpio, 1);
+	gpiod_set_value_cansleep(rockchip->ep_gpio, 1);
 
 	/* 500ms timeout value should be enough for Gen1/2 training */
 	err = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_BASIC_STATUS1,
@@ -666,7 +696,7 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 				 500 * USEC_PER_MSEC);
 	if (err) {
 		dev_err(dev, "PCIe link training gen1 timeout!\n");
-		return -ETIMEDOUT;
+		goto err_power_off_phy;
 	}
 
 	if (rockchip->link_gen == 2) {
@@ -691,6 +721,15 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 			  PCIE_CORE_PL_CONF_LANE_SHIFT);
 	dev_dbg(dev, "current link width is x%d\n", status);
 
+	/* Power off unused lane(s) */
+	rockchip->lanes_map = rockchip_pcie_lane_map(rockchip);
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		if (!(rockchip->lanes_map & BIT(i))) {
+			dev_dbg(dev, "idling lane %d\n", i);
+			phy_power_off(rockchip->phys[i]);
+		}
+	}
+
 	rockchip_pcie_write(rockchip, ROCKCHIP_VENDOR_ID,
 			    PCIE_CORE_CONFIG_VENDOR);
 	rockchip_pcie_write(rockchip,
@@ -715,6 +754,26 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 	rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCSR);
 
 	return 0;
+err_power_off_phy:
+	while (i--)
+		phy_power_off(rockchip->phys[i]);
+	i = MAX_LANE_NUM;
+err_exit_phy:
+	while (i--)
+		phy_exit(rockchip->phys[i]);
+	return err;
+}
+
+static void rockchip_pcie_deinit_phys(struct rockchip_pcie *rockchip)
+{
+	int i;
+
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		/* inactive lanes are already powered off */
+		if (rockchip->lanes_map & BIT(i))
+			phy_power_off(rockchip->phys[i]);
+		phy_exit(rockchip->phys[i]);
+	}
 }
 
 static irqreturn_t rockchip_pcie_subsys_irq_handler(int irq, void *arg)
@@ -853,6 +912,91 @@ static void rockchip_pcie_legacy_int_handler(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
+static int rockchip_pcie_get_phys(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	struct phy *phy;
+	char *name;
+	u32 i;
+
+	phy = devm_phy_get(dev, "pcie-phy");
+	if (!IS_ERR(phy)) {
+		rockchip->legacy_phy = true;
+		rockchip->phys[0] = phy;
+		dev_warn(dev, "legacy phy model is deprecated!\n");
+		return 0;
+	}
+
+	if (PTR_ERR(phy) == -EPROBE_DEFER)
+		return PTR_ERR(phy);
+
+	dev_dbg(dev, "missing legacy phy; search for per-lane PHY\n");
+
+	for (i = 0; i < MAX_LANE_NUM; i++) {
+		name = kasprintf(GFP_KERNEL, "pcie-phy-%u", i);
+		if (!name)
+			return -ENOMEM;
+
+		phy = devm_of_phy_get(dev, dev->of_node, name);
+		kfree(name);
+
+		if (IS_ERR(phy)) {
+			if (PTR_ERR(phy) != -EPROBE_DEFER)
+				dev_err(dev, "missing phy for lane %d: %ld\n",
+					i, PTR_ERR(phy));
+			return PTR_ERR(phy);
+		}
+
+		rockchip->phys[i] = phy;
+	}
+
+	return 0;
+}
+
+static int rockchip_pcie_setup_irq(struct rockchip_pcie *rockchip)
+{
+	int irq, err;
+	struct device *dev = rockchip->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	irq = platform_get_irq_byname(pdev, "sys");
+	if (irq < 0) {
+		dev_err(dev, "missing sys IRQ resource\n");
+		return irq;
+	}
+
+	err = devm_request_irq(dev, irq, rockchip_pcie_subsys_irq_handler,
+			       IRQF_SHARED, "pcie-sys", rockchip);
+	if (err) {
+		dev_err(dev, "failed to request PCIe subsystem IRQ\n");
+		return err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "legacy");
+	if (irq < 0) {
+		dev_err(dev, "missing legacy IRQ resource\n");
+		return irq;
+	}
+
+	irq_set_chained_handler_and_data(irq,
+					 rockchip_pcie_legacy_int_handler,
+					 rockchip);
+
+	irq = platform_get_irq_byname(pdev, "client");
+	if (irq < 0) {
+		dev_err(dev, "missing client IRQ resource\n");
+		return irq;
+	}
+
+	err = devm_request_irq(dev, irq, rockchip_pcie_client_irq_handler,
+			       IRQF_SHARED, "pcie-client", rockchip);
+	if (err) {
+		dev_err(dev, "failed to request PCIe client IRQ\n");
+		return err;
+	}
+
+	return 0;
+}
 
 /**
  * rockchip_pcie_parse_dt - Parse Device Tree
@@ -866,7 +1010,6 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct device_node *node = dev->of_node;
 	struct resource *regs;
-	int irq;
 	int err;
 
 	regs = platform_get_resource_byname(pdev,
@@ -883,12 +1026,9 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
 	if (IS_ERR(rockchip->apb_base))
 		return PTR_ERR(rockchip->apb_base);
 
-	rockchip->phy = devm_phy_get(dev, "pcie-phy");
-	if (IS_ERR(rockchip->phy)) {
-		if (PTR_ERR(rockchip->phy) != -EPROBE_DEFER)
-			dev_err(dev, "missing phy\n");
-		return PTR_ERR(rockchip->phy);
-	}
+	err = rockchip_pcie_get_phys(rockchip);
+	if (err)
+		return err;
 
 	rockchip->lanes = 1;
 	err = of_property_read_u32(node, "num-lanes", &rockchip->lanes);
@@ -903,49 +1043,50 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
 	if (rockchip->link_gen < 0 || rockchip->link_gen > 2)
 		rockchip->link_gen = 2;
 
-	rockchip->core_rst = devm_reset_control_get(dev, "core");
+	rockchip->core_rst = devm_reset_control_get_exclusive(dev, "core");
 	if (IS_ERR(rockchip->core_rst)) {
 		if (PTR_ERR(rockchip->core_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing core reset property in node\n");
 		return PTR_ERR(rockchip->core_rst);
 	}
 
-	rockchip->mgmt_rst = devm_reset_control_get(dev, "mgmt");
+	rockchip->mgmt_rst = devm_reset_control_get_exclusive(dev, "mgmt");
 	if (IS_ERR(rockchip->mgmt_rst)) {
 		if (PTR_ERR(rockchip->mgmt_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing mgmt reset property in node\n");
 		return PTR_ERR(rockchip->mgmt_rst);
 	}
 
-	rockchip->mgmt_sticky_rst = devm_reset_control_get(dev, "mgmt-sticky");
+	rockchip->mgmt_sticky_rst = devm_reset_control_get_exclusive(dev,
+								     "mgmt-sticky");
 	if (IS_ERR(rockchip->mgmt_sticky_rst)) {
 		if (PTR_ERR(rockchip->mgmt_sticky_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing mgmt-sticky reset property in node\n");
 		return PTR_ERR(rockchip->mgmt_sticky_rst);
 	}
 
-	rockchip->pipe_rst = devm_reset_control_get(dev, "pipe");
+	rockchip->pipe_rst = devm_reset_control_get_exclusive(dev, "pipe");
 	if (IS_ERR(rockchip->pipe_rst)) {
 		if (PTR_ERR(rockchip->pipe_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing pipe reset property in node\n");
 		return PTR_ERR(rockchip->pipe_rst);
 	}
 
-	rockchip->pm_rst = devm_reset_control_get(dev, "pm");
+	rockchip->pm_rst = devm_reset_control_get_exclusive(dev, "pm");
 	if (IS_ERR(rockchip->pm_rst)) {
 		if (PTR_ERR(rockchip->pm_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing pm reset property in node\n");
 		return PTR_ERR(rockchip->pm_rst);
 	}
 
-	rockchip->pclk_rst = devm_reset_control_get(dev, "pclk");
+	rockchip->pclk_rst = devm_reset_control_get_exclusive(dev, "pclk");
 	if (IS_ERR(rockchip->pclk_rst)) {
 		if (PTR_ERR(rockchip->pclk_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing pclk reset property in node\n");
 		return PTR_ERR(rockchip->pclk_rst);
 	}
 
-	rockchip->aclk_rst = devm_reset_control_get(dev, "aclk");
+	rockchip->aclk_rst = devm_reset_control_get_exclusive(dev, "aclk");
 	if (IS_ERR(rockchip->aclk_rst)) {
 		if (PTR_ERR(rockchip->aclk_rst) != -EPROBE_DEFER)
 			dev_err(dev, "missing aclk reset property in node\n");
@@ -982,40 +1123,15 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
 		return PTR_ERR(rockchip->clk_pcie_pm);
 	}
 
-	irq = platform_get_irq_byname(pdev, "sys");
-	if (irq < 0) {
-		dev_err(dev, "missing sys IRQ resource\n");
-		return -EINVAL;
-	}
-
-	err = devm_request_irq(dev, irq, rockchip_pcie_subsys_irq_handler,
-			       IRQF_SHARED, "pcie-sys", rockchip);
-	if (err) {
-		dev_err(dev, "failed to request PCIe subsystem IRQ\n");
+	err = rockchip_pcie_setup_irq(rockchip);
+	if (err)
 		return err;
-	}
 
-	irq = platform_get_irq_byname(pdev, "legacy");
-	if (irq < 0) {
-		dev_err(dev, "missing legacy IRQ resource\n");
-		return -EINVAL;
-	}
-
-	irq_set_chained_handler_and_data(irq,
-					 rockchip_pcie_legacy_int_handler,
-					 rockchip);
-
-	irq = platform_get_irq_byname(pdev, "client");
-	if (irq < 0) {
-		dev_err(dev, "missing client IRQ resource\n");
-		return -EINVAL;
-	}
-
-	err = devm_request_irq(dev, irq, rockchip_pcie_client_irq_handler,
-			       IRQF_SHARED, "pcie-client", rockchip);
-	if (err) {
-		dev_err(dev, "failed to request PCIe client IRQ\n");
-		return err;
+	rockchip->vpcie12v = devm_regulator_get_optional(dev, "vpcie12v");
+	if (IS_ERR(rockchip->vpcie12v)) {
+		if (PTR_ERR(rockchip->vpcie12v) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "no vpcie12v regulator found\n");
 	}
 
 	rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3");
@@ -1047,11 +1163,19 @@ static int rockchip_pcie_set_vpcie(struct rockchip_pcie *rockchip)
 	struct device *dev = rockchip->dev;
 	int err;
 
+	if (!IS_ERR(rockchip->vpcie12v)) {
+		err = regulator_enable(rockchip->vpcie12v);
+		if (err) {
+			dev_err(dev, "fail to enable vpcie12v regulator\n");
+			goto err_out;
+		}
+	}
+
 	if (!IS_ERR(rockchip->vpcie3v3)) {
 		err = regulator_enable(rockchip->vpcie3v3);
 		if (err) {
 			dev_err(dev, "fail to enable vpcie3v3 regulator\n");
-			goto err_out;
+			goto err_disable_12v;
 		}
 	}
 
@@ -1079,6 +1203,9 @@ static int rockchip_pcie_set_vpcie(struct rockchip_pcie *rockchip)
 err_disable_3v3:
 	if (!IS_ERR(rockchip->vpcie3v3))
 		regulator_disable(rockchip->vpcie3v3);
+err_disable_12v:
+	if (!IS_ERR(rockchip->vpcie12v))
+		regulator_disable(rockchip->vpcie12v);
 err_out:
 	return err;
 }
@@ -1116,7 +1243,7 @@ static int rockchip_pcie_init_irq_domain(struct rockchip_pcie *rockchip)
 		return -EINVAL;
 	}
 
-	rockchip->irq_domain = irq_domain_add_linear(intc, 4,
+	rockchip->irq_domain = irq_domain_add_linear(intc, PCI_NUM_INTX,
 						    &intx_domain_ops, rockchip);
 	if (!rockchip->irq_domain) {
 		dev_err(dev, "failed to get a INTx IRQ domain\n");
@@ -1270,6 +1397,56 @@ static int rockchip_pcie_wait_l2(struct rockchip_pcie *rockchip)
 	return 0;
 }
 
+static int rockchip_pcie_enable_clocks(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->dev;
+	int err;
+
+	err = clk_prepare_enable(rockchip->aclk_pcie);
+	if (err) {
+		dev_err(dev, "unable to enable aclk_pcie clock\n");
+		return err;
+	}
+
+	err = clk_prepare_enable(rockchip->aclk_perf_pcie);
+	if (err) {
+		dev_err(dev, "unable to enable aclk_perf_pcie clock\n");
+		goto err_aclk_perf_pcie;
+	}
+
+	err = clk_prepare_enable(rockchip->hclk_pcie);
+	if (err) {
+		dev_err(dev, "unable to enable hclk_pcie clock\n");
+		goto err_hclk_pcie;
+	}
+
+	err = clk_prepare_enable(rockchip->clk_pcie_pm);
+	if (err) {
+		dev_err(dev, "unable to enable clk_pcie_pm clock\n");
+		goto err_clk_pcie_pm;
+	}
+
+	return 0;
+
+err_clk_pcie_pm:
+	clk_disable_unprepare(rockchip->hclk_pcie);
+err_hclk_pcie:
+	clk_disable_unprepare(rockchip->aclk_perf_pcie);
+err_aclk_perf_pcie:
+	clk_disable_unprepare(rockchip->aclk_pcie);
+	return err;
+}
+
+static void rockchip_pcie_disable_clocks(void *data)
+{
+	struct rockchip_pcie *rockchip = data;
+
+	clk_disable_unprepare(rockchip->clk_pcie_pm);
+	clk_disable_unprepare(rockchip->hclk_pcie);
+	clk_disable_unprepare(rockchip->aclk_perf_pcie);
+	clk_disable_unprepare(rockchip->aclk_pcie);
+}
+
 static int __maybe_unused rockchip_pcie_suspend_noirq(struct device *dev)
 {
 	struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
@@ -1286,13 +1463,9 @@ static int __maybe_unused rockchip_pcie_suspend_noirq(struct device *dev)
 		return ret;
 	}
 
-	phy_power_off(rockchip->phy);
-	phy_exit(rockchip->phy);
+	rockchip_pcie_deinit_phys(rockchip);
 
-	clk_disable_unprepare(rockchip->clk_pcie_pm);
-	clk_disable_unprepare(rockchip->hclk_pcie);
-	clk_disable_unprepare(rockchip->aclk_perf_pcie);
-	clk_disable_unprepare(rockchip->aclk_pcie);
+	rockchip_pcie_disable_clocks(rockchip);
 
 	if (!IS_ERR(rockchip->vpcie0v9))
 		regulator_disable(rockchip->vpcie0v9);
@@ -1313,21 +1486,9 @@ static int __maybe_unused rockchip_pcie_resume_noirq(struct device *dev)
 		}
 	}
 
-	err = clk_prepare_enable(rockchip->clk_pcie_pm);
+	err = rockchip_pcie_enable_clocks(rockchip);
 	if (err)
-		goto err_pcie_pm;
-
-	err = clk_prepare_enable(rockchip->hclk_pcie);
-	if (err)
-		goto err_hclk_pcie;
-
-	err = clk_prepare_enable(rockchip->aclk_perf_pcie);
-	if (err)
-		goto err_aclk_perf_pcie;
-
-	err = clk_prepare_enable(rockchip->aclk_pcie);
-	if (err)
-		goto err_aclk_pcie;
+		goto err_disable_0v9;
 
 	err = rockchip_pcie_init_port(rockchip);
 	if (err)
@@ -1335,7 +1496,7 @@ static int __maybe_unused rockchip_pcie_resume_noirq(struct device *dev)
 
 	err = rockchip_pcie_cfg_atu(rockchip);
 	if (err)
-		goto err_pcie_resume;
+		goto err_err_deinit_port;
 
 	/* Need this to enter L1 again */
 	rockchip_pcie_update_txcredit_mui(rockchip);
@@ -1343,15 +1504,13 @@ static int __maybe_unused rockchip_pcie_resume_noirq(struct device *dev)
 
 	return 0;
 
+err_err_deinit_port:
+	rockchip_pcie_deinit_phys(rockchip);
 err_pcie_resume:
-	clk_disable_unprepare(rockchip->aclk_pcie);
-err_aclk_pcie:
-	clk_disable_unprepare(rockchip->aclk_perf_pcie);
-err_aclk_perf_pcie:
-	clk_disable_unprepare(rockchip->hclk_pcie);
-err_hclk_pcie:
-	clk_disable_unprepare(rockchip->clk_pcie_pm);
-err_pcie_pm:
+	rockchip_pcie_disable_clocks(rockchip);
+err_disable_0v9:
+	if (!IS_ERR(rockchip->vpcie0v9))
+		regulator_disable(rockchip->vpcie0v9);
 	return err;
 }
 
@@ -1385,29 +1544,9 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	err = clk_prepare_enable(rockchip->aclk_pcie);
-	if (err) {
-		dev_err(dev, "unable to enable aclk_pcie clock\n");
-		goto err_aclk_pcie;
-	}
-
-	err = clk_prepare_enable(rockchip->aclk_perf_pcie);
-	if (err) {
-		dev_err(dev, "unable to enable aclk_perf_pcie clock\n");
-		goto err_aclk_perf_pcie;
-	}
-
-	err = clk_prepare_enable(rockchip->hclk_pcie);
-	if (err) {
-		dev_err(dev, "unable to enable hclk_pcie clock\n");
-		goto err_hclk_pcie;
-	}
-
-	err = clk_prepare_enable(rockchip->clk_pcie_pm);
-	if (err) {
-		dev_err(dev, "unable to enable hclk_pcie clock\n");
-		goto err_pcie_pm;
-	}
+	err = rockchip_pcie_enable_clocks(rockchip);
+	if (err)
+		return err;
 
 	err = rockchip_pcie_set_vpcie(rockchip);
 	if (err) {
@@ -1423,12 +1562,12 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 
 	err = rockchip_pcie_init_irq_domain(rockchip);
 	if (err < 0)
-		goto err_vpcie;
+		goto err_deinit_port;
 
 	err = of_pci_get_host_bridge_resources(dev->of_node, 0, 0xff,
 					       &res, &io_base);
 	if (err)
-		goto err_vpcie;
+		goto err_remove_irq_domain;
 
 	err = devm_request_pci_bus_resources(dev, &res);
 	if (err)
@@ -1466,12 +1605,12 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 
 	err = rockchip_pcie_cfg_atu(rockchip);
 	if (err)
-		goto err_free_res;
+		goto err_unmap_iospace;
 
 	rockchip->msg_region = devm_ioremap(dev, rockchip->msg_bus_addr, SZ_1M);
 	if (!rockchip->msg_region) {
 		err = -ENOMEM;
-		goto err_free_res;
+		goto err_unmap_iospace;
 	}
 
 	list_splice_init(&res, &bridge->windows);
@@ -1484,7 +1623,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 
 	err = pci_scan_root_bus_bridge(bridge);
 	if (err < 0)
-		goto err_free_res;
+		goto err_unmap_iospace;
 
 	bus = bridge->bus;
 
@@ -1498,9 +1637,17 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 	pci_bus_add_devices(bus);
 	return 0;
 
+err_unmap_iospace:
+	pci_unmap_iospace(rockchip->io);
 err_free_res:
 	pci_free_resource_list(&res);
+err_remove_irq_domain:
+	irq_domain_remove(rockchip->irq_domain);
+err_deinit_port:
+	rockchip_pcie_deinit_phys(rockchip);
 err_vpcie:
+	if (!IS_ERR(rockchip->vpcie12v))
+		regulator_disable(rockchip->vpcie12v);
 	if (!IS_ERR(rockchip->vpcie3v3))
 		regulator_disable(rockchip->vpcie3v3);
 	if (!IS_ERR(rockchip->vpcie1v8))
@@ -1508,14 +1655,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 	if (!IS_ERR(rockchip->vpcie0v9))
 		regulator_disable(rockchip->vpcie0v9);
 err_set_vpcie:
-	clk_disable_unprepare(rockchip->clk_pcie_pm);
-err_pcie_pm:
-	clk_disable_unprepare(rockchip->hclk_pcie);
-err_hclk_pcie:
-	clk_disable_unprepare(rockchip->aclk_perf_pcie);
-err_aclk_perf_pcie:
-	clk_disable_unprepare(rockchip->aclk_pcie);
-err_aclk_pcie:
+	rockchip_pcie_disable_clocks(rockchip);
 	return err;
 }
 
@@ -1529,14 +1669,12 @@ static int rockchip_pcie_remove(struct platform_device *pdev)
 	pci_unmap_iospace(rockchip->io);
 	irq_domain_remove(rockchip->irq_domain);
 
-	phy_power_off(rockchip->phy);
-	phy_exit(rockchip->phy);
+	rockchip_pcie_deinit_phys(rockchip);
 
-	clk_disable_unprepare(rockchip->clk_pcie_pm);
-	clk_disable_unprepare(rockchip->hclk_pcie);
-	clk_disable_unprepare(rockchip->aclk_perf_pcie);
-	clk_disable_unprepare(rockchip->aclk_pcie);
+	rockchip_pcie_disable_clocks(rockchip);
 
+	if (!IS_ERR(rockchip->vpcie12v))
+		regulator_disable(rockchip->vpcie12v);
 	if (!IS_ERR(rockchip->vpcie3v3))
 		regulator_disable(rockchip->vpcie3v3);
 	if (!IS_ERR(rockchip->vpcie1v8))
diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c
index eec641a..65dea98 100644
--- a/drivers/pci/host/pcie-xilinx-nwl.c
+++ b/drivers/pci/host/pcie-xilinx-nwl.c
@@ -133,7 +133,6 @@
 #define CFG_DMA_REG_BAR			GENMASK(2, 0)
 
 #define INT_PCI_MSI_NR			(2 * 32)
-#define INTX_NUM			4
 
 /* Readin the PS_LINKUP */
 #define PS_LINKUP_OFFSET		0x00000238
@@ -334,9 +333,8 @@ static void nwl_pcie_leg_handler(struct irq_desc *desc)
 
 	while ((status = nwl_bridge_readl(pcie, MSGF_LEG_STATUS) &
 				MSGF_LEG_SR_MASKALL) != 0) {
-		for_each_set_bit(bit, &status, INTX_NUM) {
-			virq = irq_find_mapping(pcie->legacy_irq_domain,
-						bit + 1);
+		for_each_set_bit(bit, &status, PCI_NUM_INTX) {
+			virq = irq_find_mapping(pcie->legacy_irq_domain, bit);
 			if (virq)
 				generic_handle_irq(virq);
 		}
@@ -436,6 +434,7 @@ static int nwl_legacy_map(struct irq_domain *domain, unsigned int irq,
 
 static const struct irq_domain_ops legacy_domain_ops = {
 	.map = nwl_legacy_map,
+	.xlate = pci_irqd_intx_xlate,
 };
 
 #ifdef CONFIG_PCI_MSI
@@ -559,7 +558,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
 	}
 
 	pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node,
-							INTX_NUM,
+							PCI_NUM_INTX,
 							&legacy_domain_ops,
 							pcie);
 
@@ -813,7 +812,7 @@ static int nwl_pcie_parse_dt(struct nwl_pcie *pcie,
 	pcie->irq_intx = platform_get_irq_byname(pdev, "intx");
 	if (pcie->irq_intx < 0) {
 		dev_err(dev, "failed to get intx IRQ %d\n", pcie->irq_intx);
-		return -EINVAL;
+		return pcie->irq_intx;
 	}
 
 	irq_set_chained_handler_and_data(pcie->irq_intx,
diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c
index f63fa5e..94e13cb 100644
--- a/drivers/pci/host/pcie-xilinx.c
+++ b/drivers/pci/host/pcie-xilinx.c
@@ -5,7 +5,7 @@
  *
  * Based on the Tegra PCIe driver
  *
- * Bits taken from Synopsys Designware Host controller driver and
+ * Bits taken from Synopsys DesignWare Host controller driver and
  * ARM PCI Host generic driver.
  *
  * This program is free software: you can redistribute it and/or modify
@@ -60,6 +60,7 @@
 #define XILINX_PCIE_INTR_MST_SLVERR	BIT(27)
 #define XILINX_PCIE_INTR_MST_ERRP	BIT(28)
 #define XILINX_PCIE_IMR_ALL_MASK	0x1FF30FED
+#define XILINX_PCIE_IMR_ENABLE_MASK	0x1FF30F0D
 #define XILINX_PCIE_IDR_ALL_MASK	0xFFFFFFFF
 
 /* Root Port Error FIFO Read Register definitions */
@@ -369,6 +370,7 @@ static int xilinx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
 /* INTx IRQ Domain operations */
 static const struct irq_domain_ops intx_domain_ops = {
 	.map = xilinx_pcie_intx_map,
+	.xlate = pci_irqd_intx_xlate,
 };
 
 /* PCIe HW Functions */
@@ -384,7 +386,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
 {
 	struct xilinx_pcie_port *port = (struct xilinx_pcie_port *)data;
 	struct device *dev = port->dev;
-	u32 val, mask, status, msi_data;
+	u32 val, mask, status;
 
 	/* Read interrupt decode and mask registers */
 	val = pcie_read(port, XILINX_PCIE_REG_IDR);
@@ -424,8 +426,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
 		xilinx_pcie_clear_err_interrupts(port);
 	}
 
-	if (status & XILINX_PCIE_INTR_INTX) {
-		/* INTx interrupt received */
+	if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) {
 		val = pcie_read(port, XILINX_PCIE_REG_RPIFR1);
 
 		/* Check whether interrupt valid */
@@ -434,41 +435,24 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
 			goto error;
 		}
 
-		if (!(val & XILINX_PCIE_RPIFR1_MSI_INTR)) {
-			/* Clear interrupt FIFO register 1 */
-			pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK,
-				   XILINX_PCIE_REG_RPIFR1);
-
-			/* Handle INTx Interrupt */
-			val = ((val & XILINX_PCIE_RPIFR1_INTR_MASK) >>
-				XILINX_PCIE_RPIFR1_INTR_SHIFT) + 1;
-			generic_handle_irq(irq_find_mapping(port->leg_domain,
-							    val));
-		}
-	}
-
-	if (status & XILINX_PCIE_INTR_MSI) {
-		/* MSI Interrupt */
-		val = pcie_read(port, XILINX_PCIE_REG_RPIFR1);
-
-		if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) {
-			dev_warn(dev, "RP Intr FIFO1 read error\n");
-			goto error;
-		}
-
+		/* Decode the IRQ number */
 		if (val & XILINX_PCIE_RPIFR1_MSI_INTR) {
-			msi_data = pcie_read(port, XILINX_PCIE_REG_RPIFR2) &
-				   XILINX_PCIE_RPIFR2_MSG_DATA;
-
-			/* Clear interrupt FIFO register 1 */
-			pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK,
-				   XILINX_PCIE_REG_RPIFR1);
-
-			if (IS_ENABLED(CONFIG_PCI_MSI)) {
-				/* Handle MSI Interrupt */
-				generic_handle_irq(msi_data);
-			}
+			val = pcie_read(port, XILINX_PCIE_REG_RPIFR2) &
+				XILINX_PCIE_RPIFR2_MSG_DATA;
+		} else {
+			val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >>
+				XILINX_PCIE_RPIFR1_INTR_SHIFT;
+			val = irq_find_mapping(port->leg_domain, val);
 		}
+
+		/* Clear interrupt FIFO register 1 */
+		pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK,
+			   XILINX_PCIE_REG_RPIFR1);
+
+		/* Handle the interrupt */
+		if (IS_ENABLED(CONFIG_PCI_MSI) ||
+		    !(val & XILINX_PCIE_RPIFR1_MSI_INTR))
+			generic_handle_irq(val);
 	}
 
 	if (status & XILINX_PCIE_INTR_SLV_UNSUPP)
@@ -524,7 +508,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port)
 		return -ENODEV;
 	}
 
-	port->leg_domain = irq_domain_add_linear(pcie_intc_node, 4,
+	port->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
 						 &intx_domain_ops,
 						 port);
 	if (!port->leg_domain) {
@@ -571,8 +555,8 @@ static void xilinx_pcie_init_port(struct xilinx_pcie_port *port)
 			 XILINX_PCIE_IMR_ALL_MASK,
 		   XILINX_PCIE_REG_IDR);
 
-	/* Enable all interrupts */
-	pcie_write(port, XILINX_PCIE_IMR_ALL_MASK, XILINX_PCIE_REG_IMR);
+	/* Enable all interrupts we handle */
+	pcie_write(port, XILINX_PCIE_IMR_ENABLE_MASK, XILINX_PCIE_REG_IMR);
 
 	/* Enable the Bridge enable bit */
 	pcie_write(port, pcie_read(port, XILINX_PCIE_REG_RPSC) |
diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c
index 6088c30..509893b 100644
--- a/drivers/pci/host/vmd.c
+++ b/drivers/pci/host/vmd.c
@@ -183,7 +183,7 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
 	int i, best = 1;
 	unsigned long flags;
 
-	if (!desc->msi_attrib.is_msix || vmd->msix_count == 1)
+	if (pci_is_bridge(msi_desc_to_pci_dev(desc)) || vmd->msix_count == 1)
 		return &vmd->irqs[0];
 
 	raw_spin_lock_irqsave(&list_lock, flags);
@@ -697,7 +697,7 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENODEV;
 
 	vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count,
-					PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
+					PCI_IRQ_MSIX);
 	if (vmd->msix_count < 0)
 		return vmd->msix_count;
 
@@ -755,6 +755,11 @@ static void vmd_remove(struct pci_dev *dev)
 static int vmd_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = pci_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < vmd->msix_count; i++)
+                devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
 
 	pci_save_state(pdev);
 	return 0;
@@ -763,6 +768,16 @@ static int vmd_suspend(struct device *dev)
 static int vmd_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct vmd_dev *vmd = pci_get_drvdata(pdev);
+	int err, i;
+
+	for (i = 0; i < vmd->msix_count; i++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
+				       vmd_irq, IRQF_NO_THREAD,
+				       "vmd", &vmd->irqs[i]);
+		if (err)
+			return err;
+	}
 
 	pci_restore_state(pdev);
 	return 0;
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
index 5f49c3f..2f8659a 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.c
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c
@@ -280,7 +280,7 @@ static void zt5550_hc_remove_one(struct pci_dev *pdev)
 }
 
 
-static struct pci_device_id zt5550_hc_pci_tbl[] = {
+static const struct pci_device_id zt5550_hc_pci_tbl[] = {
 	{ PCI_VENDOR_ID_ZIATECH, PCI_DEVICE_ID_ZIATECH_5550_HC, PCI_ANY_ID, PCI_ANY_ID, },
 	{ 0, }
 };
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 33d300d..4d06b84 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -1417,7 +1417,7 @@ static void __exit unload_cpqphpd(void)
 		iounmap(smbios_start);
 }
 
-static struct pci_device_id hpcd_pci_tbl[] = {
+static const struct pci_device_id hpcd_pci_tbl[] = {
 	{
 	/* handle any PCI Hotplug controller */
 	.class =        ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00),
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 5efd01d..73cf846 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -852,7 +852,7 @@ static int set_bus(struct slot *slot_cur)
 	u8 speed;
 	u8 cmd = 0x0;
 	int retval;
-	static struct pci_device_id ciobx[] = {
+	static const struct pci_device_id ciobx[] = {
 		{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, 0x0101) },
 		{ },
 	};
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 43e345a..a6a4dac 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -1153,7 +1153,7 @@ void ibmphp_free_ebda_pci_rsrc_queue(void)
 	}
 }
 
-static struct pci_device_id id_table[] = {
+static const struct pci_device_id id_table[] = {
 	{
 		.vendor		= PCI_VENDOR_ID_IBM,
 		.device		= HPC_DEVICE_ID,
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 026830a..e5d5ce9 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -586,6 +586,14 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
 	events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
 			   PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
 			   PCI_EXP_SLTSTA_DLLSC);
+
+	/*
+	 * If we've already reported a power fault, don't report it again
+	 * until we've done something to handle it.
+	 */
+	if (ctrl->power_fault_detected)
+		events &= ~PCI_EXP_SLTSTA_PFD;
+
 	if (!events)
 		return IRQ_NONE;
 
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 7c20319..74f6a17 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -163,8 +163,8 @@ static void pnv_php_detach_device_nodes(struct device_node *parent)
 		of_node_put(dn);
 		refcount = kref_read(&dn->kobj.kref);
 		if (refcount != 1)
-			pr_warn("Invalid refcount %d on <%s>\n",
-				refcount, of_node_full_name(dn));
+			pr_warn("Invalid refcount %d on <%pOF>\n",
+				refcount, dn);
 
 		of_detach_node(dn);
 	}
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index 3f93a4e..a3449d7 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -150,8 +150,8 @@ static void dlpar_pci_add_bus(struct device_node *dn)
 	/* Add EADS device to PHB bus, adding new entry to bus->devices */
 	dev = of_create_pci_dev(dn, phb->bus, pdn->devfn);
 	if (!dev) {
-		printk(KERN_ERR "%s: failed to create pci dev for %s\n",
-				__func__, dn->full_name);
+		printk(KERN_ERR "%s: failed to create pci dev for %pOF\n",
+				__func__, dn);
 		return;
 	}
 
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
index a796301..edb5d8a 100644
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -102,7 +102,7 @@ static struct attribute *default_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group dlpar_attr_group = {
+static const struct attribute_group dlpar_attr_group = {
 	.attrs = default_attrs,
 };
 
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 8d13202..1e29aba 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -318,7 +318,7 @@ int rpaphp_add_slot(struct device_node *dn)
 	if (!is_php_dn(dn, &indexes, &names, &types, &power_domains))
 		return 0;
 
-	dbg("Entry %s: dn->full_name=%s\n", __func__, dn->full_name);
+	dbg("Entry %s: dn=%pOF\n", __func__, dn);
 
 	/* register PCI devices */
 	name = (char *) &names[1];
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index ea41ea1..32aabc5 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -95,7 +95,7 @@ int rpaphp_enable_slot(struct slot *slot)
 
 	bus = pci_find_bus_by_node(slot->dn);
 	if (!bus) {
-		err("%s: no pci_bus for dn %s\n", __func__, slot->dn->full_name);
+		err("%s: no pci_bus for dn %pOF\n", __func__, slot->dn);
 		return -EINVAL;
 	}
 
@@ -125,7 +125,7 @@ int rpaphp_enable_slot(struct slot *slot)
 
 		if (rpaphp_debug) {
 			struct pci_dev *dev;
-			dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name);
+			dbg("%s: pci_devs of slot[%pOF]\n", __func__, slot->dn);
 			list_for_each_entry(dev, &bus->devices, bus_list)
 				dbg("\t%s\n", pci_name(dev));
 		}
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index 388c4d8..4898623 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -122,8 +122,8 @@ int rpaphp_register_slot(struct slot *slot)
 	int retval;
 	int slotno = -1;
 
-	dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n",
-		__func__, slot->dn->full_name, slot->index, slot->name,
+	dbg("%s registering slot:path[%pOF] index[%x], name[%s] pdomain[%x] type[%d]\n",
+		__func__, slot->dn, slot->index, slot->name,
 		slot->power_domain, slot->type);
 
 	/* should not try to register the same slot twice */
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 3454dc7..7bfb87b 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -351,7 +351,7 @@ static void shpc_remove(struct pci_dev *dev)
 	kfree(ctrl);
 }
 
-static struct pci_device_id shpcd_pci_tbl[] = {
+static const struct pci_device_id shpcd_pci_tbl[] = {
 	{PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0)},
 	{ /* end: all zeroes */ }
 };
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
index de0ea47..e5824c7 100644
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -1062,6 +1062,8 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev)
 		if (rc) {
 			ctrl_info(ctrl, "Can't get msi for the hotplug controller\n");
 			ctrl_info(ctrl, "Use INTx for the hotplug controller\n");
+		} else {
+			pci_set_master(pdev);
 		}
 
 		rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED,
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 120485d..ac41c8b 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -331,7 +331,6 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	while (i--)
 		pci_iov_remove_virtfn(dev, i, 0);
 
-	pcibios_sriov_disable(dev);
 err_pcibios:
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 	pci_cfg_access_lock(dev);
@@ -339,6 +338,8 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	ssleep(1);
 	pci_cfg_access_unlock(dev);
 
+	pcibios_sriov_disable(dev);
+
 	if (iov->link != dev->devfn)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
 
@@ -357,14 +358,14 @@ static void sriov_disable(struct pci_dev *dev)
 	for (i = 0; i < iov->num_VFs; i++)
 		pci_iov_remove_virtfn(dev, i, 0);
 
-	pcibios_sriov_disable(dev);
-
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	ssleep(1);
 	pci_cfg_access_unlock(dev);
 
+	pcibios_sriov_disable(dev);
+
 	if (iov->link != dev->devfn)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2225afc..496ed91 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1451,13 +1451,30 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain);
 
+/*
+ * Users of the generic MSI infrastructure expect a device to have a single ID,
+ * so with DMA aliases we have to pick the least-worst compromise. Devices with
+ * DMA phantom functions tend to still emit MSIs from the real function number,
+ * so we ignore those and only consider topological aliases where either the
+ * alias device or RID appears on a different bus number. We also make the
+ * reasonable assumption that bridges are walked in an upstream direction (so
+ * the last one seen wins), and the much braver assumption that the most likely
+ * case is that of PCI->PCIe so we should always use the alias RID. This echoes
+ * the logic from intel_irq_remapping's set_msi_sid(), which presumably works
+ * well enough in practice; in the face of the horrible PCIe<->PCI-X conditions
+ * for taking ownership all we can really do is close our eyes and hope...
+ */
 static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data)
 {
 	u32 *pa = data;
+	u8 bus = PCI_BUS_NUM(*pa);
 
-	*pa = alias;
+	if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus)
+		*pa = alias;
+
 	return 0;
 }
+
 /**
  * pci_msi_domain_get_msi_rid - Get the MSI requester id (RID)
  * @domain:	The interrupt domain
@@ -1471,7 +1488,7 @@ static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data)
 u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
 {
 	struct device_node *of_node;
-	u32 rid = 0;
+	u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
 
 	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
 
@@ -1487,14 +1504,14 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
  * @pdev:	The PCI device
  *
  * Use the firmware data to find a device-specific MSI domain
- * (i.e. not one that is ste as a default).
+ * (i.e. not one that is set as a default).
  *
- * Returns: The coresponding MSI domain or NULL if none has been found.
+ * Returns: The corresponding MSI domain or NULL if none has been found.
  */
 struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
 {
 	struct irq_domain *dom;
-	u32 rid = 0;
+	u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
 
 	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
 	dom = of_msi_map_get_device_domain(&pdev->dev, rid);
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index a7a41d9..7e9e795 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -123,7 +123,7 @@ static struct attribute *smbios_attributes[] = {
 	NULL,
 };
 
-static struct attribute_group smbios_attr_group = {
+static const struct attribute_group smbios_attr_group = {
 	.attrs = smbios_attributes,
 	.is_visible = smbios_instance_string_exist,
 };
@@ -260,7 +260,7 @@ static struct attribute *acpi_attributes[] = {
 	NULL,
 };
 
-static struct attribute_group acpi_attr_group = {
+static const struct attribute_group acpi_attr_group = {
 	.attrs = acpi_attributes,
 	.is_visible = acpi_index_string_exist,
 };
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 2f3780b..1eecfa3 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -556,9 +556,9 @@ static ssize_t devspec_show(struct device *dev,
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct device_node *np = pci_device_to_OF_node(pdev);
 
-	if (np == NULL || np->full_name == NULL)
+	if (np == NULL)
 		return 0;
-	return sprintf(buf, "%s", np->full_name);
+	return sprintf(buf, "%pOF", np);
 }
 static DEVICE_ATTR_RO(devspec);
 #endif
@@ -1211,11 +1211,8 @@ static ssize_t pci_resource_io(struct file *filp, struct kobject *kobj,
 {
 	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
 	int bar = (unsigned long)attr->private;
-	struct resource *res;
 	unsigned long port = off;
 
-	res = &pdev->resource[bar];
-
 	port += pci_resource_start(pdev, bar);
 
 	if (port > pci_resource_end(pdev, bar))
@@ -1431,7 +1428,7 @@ static ssize_t pci_read_rom(struct file *filp, struct kobject *kobj,
 	return count;
 }
 
-static struct bin_attribute pci_config_attr = {
+static const struct bin_attribute pci_config_attr = {
 	.attr =	{
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
@@ -1441,7 +1438,7 @@ static struct bin_attribute pci_config_attr = {
 	.write = pci_write_config,
 };
 
-static struct bin_attribute pcie_config_attr = {
+static const struct bin_attribute pcie_config_attr = {
 	.attr =	{
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
@@ -1735,7 +1732,7 @@ const struct attribute_group *pcie_dev_groups[] = {
 	NULL,
 };
 
-static struct attribute_group pci_dev_hp_attr_group = {
+static const struct attribute_group pci_dev_hp_attr_group = {
 	.attrs = pci_dev_hp_attrs,
 	.is_visible = pci_dev_hp_attrs_are_visible,
 };
@@ -1759,23 +1756,23 @@ static umode_t sriov_attrs_are_visible(struct kobject *kobj,
 	return a->mode;
 }
 
-static struct attribute_group sriov_dev_attr_group = {
+static const struct attribute_group sriov_dev_attr_group = {
 	.attrs = sriov_dev_attrs,
 	.is_visible = sriov_attrs_are_visible,
 };
 #endif /* CONFIG_PCI_IOV */
 
-static struct attribute_group pci_dev_attr_group = {
+static const struct attribute_group pci_dev_attr_group = {
 	.attrs = pci_dev_dev_attrs,
 	.is_visible = pci_dev_attrs_are_visible,
 };
 
-static struct attribute_group pci_bridge_attr_group = {
+static const struct attribute_group pci_bridge_attr_group = {
 	.attrs = pci_bridge_attrs,
 	.is_visible = pci_bridge_attrs_are_visible,
 };
 
-static struct attribute_group pcie_dev_attr_group = {
+static const struct attribute_group pcie_dev_attr_group = {
 	.attrs = pcie_dev_attrs,
 	.is_visible = pcie_dev_attrs_are_visible,
 };
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 68e3b2b..b0002da 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -52,6 +52,7 @@ static void pci_pme_list_scan(struct work_struct *work);
 static LIST_HEAD(pci_pme_list);
 static DEFINE_MUTEX(pci_pme_list_mutex);
 static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan);
+static DEFINE_MUTEX(pci_bridge_mutex);
 
 struct pci_pme_device {
 	struct list_head list;
@@ -892,7 +893,9 @@ EXPORT_SYMBOL_GPL(__pci_complete_power_transition);
  * -EINVAL if the requested state is invalid.
  * -EIO if device does not support PCI PM or its PM capabilities register has a
  * wrong version, or device doesn't support the requested state.
+ * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
  * 0 if device already is in the requested state.
+ * 0 if the transition is to D3 but D3 is not supported.
  * 0 if device's power state has been successfully changed.
  */
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
@@ -1348,10 +1351,16 @@ static void pci_enable_bridge(struct pci_dev *dev)
 	if (bridge)
 		pci_enable_bridge(bridge);
 
+	/*
+	 * Hold pci_bridge_mutex to prevent a race when enabling two
+	 * devices below the bridge simultaneously.  The race may cause a
+	 * PCI_COMMAND_MEMORY update to be lost (see changelog).
+	 */
+	mutex_lock(&pci_bridge_mutex);
 	if (pci_is_enabled(dev)) {
 		if (!dev->is_busmaster)
 			pci_set_master(dev);
-		return;
+		goto end;
 	}
 
 	retval = pci_enable_device(dev);
@@ -1359,6 +1368,8 @@ static void pci_enable_bridge(struct pci_dev *dev)
 		dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n",
 			retval);
 	pci_set_master(dev);
+end:
+	mutex_unlock(&pci_bridge_mutex);
 }
 
 static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
@@ -1383,7 +1394,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
 		return 0;		/* already enabled */
 
 	bridge = pci_upstream_bridge(dev);
-	if (bridge)
+	if (bridge && !pci_is_enabled(bridge))
 		pci_enable_bridge(bridge);
 
 	/* only skip sriov related */
@@ -3818,27 +3829,49 @@ int pci_wait_for_pending_transaction(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_wait_for_pending_transaction);
 
-/*
- * We should only need to wait 100ms after FLR, but some devices take longer.
- * Wait for up to 1000ms for config space to return something other than -1.
- * Intel IGD requires this when an LCD panel is attached.  We read the 2nd
- * dword because VFs don't implement the 1st dword.
- */
 static void pci_flr_wait(struct pci_dev *dev)
 {
-	int i = 0;
+	int delay = 1, timeout = 60000;
 	u32 id;
 
-	do {
-		msleep(100);
-		pci_read_config_dword(dev, PCI_COMMAND, &id);
-	} while (i++ < 10 && id == ~0);
+	/*
+	 * Per PCIe r3.1, sec 6.6.2, a device must complete an FLR within
+	 * 100ms, but may silently discard requests while the FLR is in
+	 * progress.  Wait 100ms before trying to access the device.
+	 */
+	msleep(100);
 
-	if (id == ~0)
-		dev_warn(&dev->dev, "Failed to return from FLR\n");
-	else if (i > 1)
-		dev_info(&dev->dev, "Required additional %dms to return from FLR\n",
-			 (i - 1) * 100);
+	/*
+	 * After 100ms, the device should not silently discard config
+	 * requests, but it may still indicate that it needs more time by
+	 * responding to them with CRS completions.  The Root Port will
+	 * generally synthesize ~0 data to complete the read (except when
+	 * CRS SV is enabled and the read was for the Vendor ID; in that
+	 * case it synthesizes 0x0001 data).
+	 *
+	 * Wait for the device to return a non-CRS completion.  Read the
+	 * Command register instead of Vendor ID so we don't have to
+	 * contend with the CRS SV value.
+	 */
+	pci_read_config_dword(dev, PCI_COMMAND, &id);
+	while (id == ~0) {
+		if (delay > timeout) {
+			dev_warn(&dev->dev, "not ready %dms after FLR; giving up\n",
+				 100 + delay - 1);
+			return;
+		}
+
+		if (delay > 1000)
+			dev_info(&dev->dev, "not ready %dms after FLR; waiting\n",
+				 100 + delay - 1);
+
+		msleep(delay);
+		delay *= 2;
+		pci_read_config_dword(dev, PCI_COMMAND, &id);
+	}
+
+	if (delay > 1000)
+		dev_info(&dev->dev, "ready %dms after FLR\n", 100 + delay - 1);
 }
 
 /**
@@ -5405,8 +5438,8 @@ static int of_pci_bus_find_domain_nr(struct device *parent)
 		use_dt_domains = 0;
 		domain = pci_get_new_domain_nr();
 	} else {
-		dev_err(parent, "Node %s has inconsistent \"linux,pci-domain\" property in DT\n",
-			parent->of_node->full_name);
+		dev_err(parent, "Node %pOF has inconsistent \"linux,pci-domain\" property in DT\n",
+			parent->of_node);
 		domain = -1;
 	}
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 22e0617..a6560c9 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -235,6 +235,7 @@ enum pci_bar_type {
 	pci_bar_mem64,		/* A 64-bit memory BAR */
 };
 
+int pci_configure_extended_tags(struct pci_dev *dev, void *ign);
 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
 				int crs_timeout);
 int pci_setup_device(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index dea186a..6ff5f5b 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -32,16 +32,9 @@
 
 static int aer_probe(struct pcie_device *dev);
 static void aer_remove(struct pcie_device *dev);
-static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
-	enum pci_channel_state error);
 static void aer_error_resume(struct pci_dev *dev);
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
 
-static const struct pci_error_handlers aer_error_handlers = {
-	.error_detected = aer_error_detected,
-	.resume		= aer_error_resume,
-};
-
 static struct pcie_port_service_driver aerdriver = {
 	.name		= "aer",
 	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
@@ -49,9 +42,7 @@ static struct pcie_port_service_driver aerdriver = {
 
 	.probe		= aer_probe,
 	.remove		= aer_remove,
-
-	.err_handler	= &aer_error_handlers,
-
+	.error_resume	= aer_error_resume,
 	.reset_link	= aer_root_reset,
 };
 
@@ -350,20 +341,6 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 }
 
 /**
- * aer_error_detected - update severity status
- * @dev: pointer to Root Port's pci_dev data structure
- * @error: error severity being notified by port bus
- *
- * Invoked by Port Bus driver during error recovery.
- */
-static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
-			enum pci_channel_state error)
-{
-	/* Root Port has no impact. Always recovers. */
-	return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
-/**
  * aer_error_resume - clean up corresponding error status bits
  * @dev: pointer to Root Port's pci_dev data structure
  *
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index b1303b3..890efcc 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -5,10 +5,10 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * This file implements the core part of PCI-Express AER. When an pci-express
+ * This file implements the core part of PCIe AER. When a PCIe
  * error is delivered, an error message will be collected and printed to
  * console, then, an error recovery procedure will be executed by following
- * the pci error recovery rules.
+ * the PCI error recovery rules.
  *
  * Copyright (C) 2006 Intel Corp.
  *	Tom Long Nguyen (tom.l.nguyen@intel.com)
diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index c39f32e..2d976a6 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -16,17 +16,62 @@
 #include <linux/pcieport_if.h>
 #include "../pci.h"
 
+struct rp_pio_header_log_regs {
+	u32 dw0;
+	u32 dw1;
+	u32 dw2;
+	u32 dw3;
+};
+
+struct dpc_rp_pio_regs {
+	u32 status;
+	u32 mask;
+	u32 severity;
+	u32 syserror;
+	u32 exception;
+
+	struct rp_pio_header_log_regs header_log;
+	u32 impspec_log;
+	u32 tlp_prefix_log[4];
+	u32 log_size;
+	u16 first_error;
+};
+
 struct dpc_dev {
 	struct pcie_device	*dev;
 	struct work_struct	work;
 	int			cap_pos;
 	bool			rp;
+	u32			rp_pio_status;
+};
+
+static const char * const rp_pio_error_string[] = {
+	"Configuration Request received UR Completion",	 /* Bit Position 0  */
+	"Configuration Request received CA Completion",	 /* Bit Position 1  */
+	"Configuration Request Completion Timeout",	 /* Bit Position 2  */
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"I/O Request received UR Completion",		 /* Bit Position 8  */
+	"I/O Request received CA Completion",		 /* Bit Position 9  */
+	"I/O Request Completion Timeout",		 /* Bit Position 10 */
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"Memory Request received UR Completion",	 /* Bit Position 16 */
+	"Memory Request received CA Completion",	 /* Bit Position 17 */
+	"Memory Request Completion Timeout",		 /* Bit Position 18 */
 };
 
 static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
 {
 	unsigned long timeout = jiffies + HZ;
 	struct pci_dev *pdev = dpc->dev->port;
+	struct device *dev = &dpc->dev->device;
 	u16 status;
 
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
@@ -36,15 +81,17 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
 		pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
 	}
 	if (status & PCI_EXP_DPC_RP_BUSY) {
-		dev_warn(&pdev->dev, "DPC root port still busy\n");
+		dev_warn(dev, "DPC root port still busy\n");
 		return -EBUSY;
 	}
 	return 0;
 }
 
-static void dpc_wait_link_inactive(struct pci_dev *pdev)
+static void dpc_wait_link_inactive(struct dpc_dev *dpc)
 {
 	unsigned long timeout = jiffies + HZ;
+	struct pci_dev *pdev = dpc->dev->port;
+	struct device *dev = &dpc->dev->device;
 	u16 lnk_status;
 
 	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
@@ -54,7 +101,7 @@ static void dpc_wait_link_inactive(struct pci_dev *pdev)
 		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
 	}
 	if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
-		dev_warn(&pdev->dev, "Link state not disabled for DPC event\n");
+		dev_warn(dev, "Link state not disabled for DPC event\n");
 }
 
 static void interrupt_event_handler(struct work_struct *work)
@@ -76,17 +123,132 @@ static void interrupt_event_handler(struct work_struct *work)
 	}
 	pci_unlock_rescan_remove();
 
-	dpc_wait_link_inactive(pdev);
+	dpc_wait_link_inactive(dpc);
 	if (dpc->rp && dpc_wait_rp_inactive(dpc))
 		return;
+	if (dpc->rp && dpc->rp_pio_status) {
+		pci_write_config_dword(pdev,
+				      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_STATUS,
+				      dpc->rp_pio_status);
+		dpc->rp_pio_status = 0;
+	}
+
 	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
 		PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
 }
 
+static void dpc_rp_pio_print_tlp_header(struct device *dev,
+					struct rp_pio_header_log_regs *t)
+{
+	dev_err(dev, "TLP Header: %#010x %#010x %#010x %#010x\n",
+		t->dw0, t->dw1, t->dw2, t->dw3);
+}
+
+static void dpc_rp_pio_print_error(struct dpc_dev *dpc,
+				   struct dpc_rp_pio_regs *rp_pio)
+{
+	struct device *dev = &dpc->dev->device;
+	int i;
+	u32 status;
+
+	dev_err(dev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n",
+		rp_pio->status, rp_pio->mask);
+
+	dev_err(dev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n",
+		rp_pio->severity, rp_pio->syserror, rp_pio->exception);
+
+	status = (rp_pio->status & ~rp_pio->mask);
+
+	for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) {
+		if (!(status & (1 << i)))
+			continue;
+
+		dev_err(dev, "[%2d] %s%s\n", i, rp_pio_error_string[i],
+			rp_pio->first_error == i ? " (First)" : "");
+	}
+
+	dpc_rp_pio_print_tlp_header(dev, &rp_pio->header_log);
+	if (rp_pio->log_size == 4)
+		return;
+	dev_err(dev, "RP PIO ImpSpec Log %#010x\n", rp_pio->impspec_log);
+
+	for (i = 0; i < rp_pio->log_size - 5; i++)
+		dev_err(dev, "TLP Prefix Header: dw%d, %#010x\n", i,
+			rp_pio->tlp_prefix_log[i]);
+}
+
+static void dpc_rp_pio_get_info(struct dpc_dev *dpc,
+				struct dpc_rp_pio_regs *rp_pio)
+{
+	struct pci_dev *pdev = dpc->dev->port;
+	struct device *dev = &dpc->dev->device;
+	int i;
+	u16 cap;
+	u16 status;
+
+	pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_STATUS,
+			      &rp_pio->status);
+	pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_MASK,
+			      &rp_pio->mask);
+
+	pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_SEVERITY,
+			      &rp_pio->severity);
+	pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_SYSERROR,
+			      &rp_pio->syserror);
+	pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_EXCEPTION,
+			      &rp_pio->exception);
+
+	/* Get First Error Pointer */
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
+	rp_pio->first_error = (status & 0x1f00) >> 8;
+
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
+	rp_pio->log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
+	if (rp_pio->log_size < 4 || rp_pio->log_size > 9) {
+		dev_err(dev, "RP PIO log size %u is invalid\n",
+			rp_pio->log_size);
+		return;
+	}
+
+	pci_read_config_dword(pdev,
+			      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
+			      &rp_pio->header_log.dw0);
+	pci_read_config_dword(pdev,
+			      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4,
+			      &rp_pio->header_log.dw1);
+	pci_read_config_dword(pdev,
+			      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8,
+			      &rp_pio->header_log.dw2);
+	pci_read_config_dword(pdev,
+			      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12,
+			      &rp_pio->header_log.dw3);
+	if (rp_pio->log_size == 4)
+		return;
+
+	pci_read_config_dword(pdev,
+			      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG,
+			      &rp_pio->impspec_log);
+	for (i = 0; i < rp_pio->log_size - 5; i++)
+		pci_read_config_dword(pdev,
+			dpc->cap_pos + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG,
+			&rp_pio->tlp_prefix_log[i]);
+}
+
+static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
+{
+	struct dpc_rp_pio_regs rp_pio_regs;
+
+	dpc_rp_pio_get_info(dpc, &rp_pio_regs);
+	dpc_rp_pio_print_error(dpc, &rp_pio_regs);
+
+	dpc->rp_pio_status = rp_pio_regs.status;
+}
+
 static irqreturn_t dpc_irq(int irq, void *context)
 {
 	struct dpc_dev *dpc = (struct dpc_dev *)context;
 	struct pci_dev *pdev = dpc->dev->port;
+	struct device *dev = &dpc->dev->device;
 	u16 status, source;
 
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
@@ -95,20 +257,24 @@ static irqreturn_t dpc_irq(int irq, void *context)
 	if (!status || status == (u16)(~0))
 		return IRQ_NONE;
 
-	dev_info(&dpc->dev->device, "DPC containment event, status:%#06x source:%#06x\n",
+	dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n",
 		status, source);
 
 	if (status & PCI_EXP_DPC_STATUS_TRIGGER) {
 		u16 reason = (status >> 1) & 0x3;
 		u16 ext_reason = (status >> 5) & 0x3;
 
-		dev_warn(&dpc->dev->device, "DPC %s detected, remove downstream devices\n",
+		dev_warn(dev, "DPC %s detected, remove downstream devices\n",
 			 (reason == 0) ? "unmasked uncorrectable error" :
 			 (reason == 1) ? "ERR_NONFATAL" :
 			 (reason == 2) ? "ERR_FATAL" :
 			 (ext_reason == 0) ? "RP PIO error" :
 			 (ext_reason == 1) ? "software trigger" :
 					     "reserved error");
+		/* show RP PIO error detail information */
+		if (reason == 3 && ext_reason == 0)
+			dpc_process_rp_pio_error(dpc);
+
 		schedule_work(&dpc->work);
 	}
 	return IRQ_HANDLED;
@@ -119,10 +285,11 @@ static int dpc_probe(struct pcie_device *dev)
 {
 	struct dpc_dev *dpc;
 	struct pci_dev *pdev = dev->port;
+	struct device *device = &dev->device;
 	int status;
 	u16 ctl, cap;
 
-	dpc = devm_kzalloc(&dev->device, sizeof(*dpc), GFP_KERNEL);
+	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
 	if (!dpc)
 		return -ENOMEM;
 
@@ -131,10 +298,10 @@ static int dpc_probe(struct pcie_device *dev)
 	INIT_WORK(&dpc->work, interrupt_event_handler);
 	set_service_data(dev, dpc);
 
-	status = devm_request_irq(&dev->device, dev->irq, dpc_irq, IRQF_SHARED,
+	status = devm_request_irq(device, dev->irq, dpc_irq, IRQF_SHARED,
 				  "pcie-dpc", dpc);
 	if (status) {
-		dev_warn(&dev->device, "request IRQ%d failed: %d\n", dev->irq,
+		dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
 			 status);
 		return status;
 	}
@@ -147,7 +314,7 @@ static int dpc_probe(struct pcie_device *dev)
 	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN;
 	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
 
-	dev_info(&dev->device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
+	dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
 		cap & 0xf, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
 		FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
 		FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), (cap >> 8) & 0xf,
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 8aa3f14..be635f0 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -21,7 +21,6 @@
 
 #include "../pci.h"
 #include "portdrv.h"
-#include "aer/aerdrv.h"
 
 /* If this switch is set, PCIe port native services should not be enabled. */
 bool pcie_ports_disabled;
@@ -177,108 +176,20 @@ static void pcie_portdrv_remove(struct pci_dev *dev)
 	pcie_port_device_remove(dev);
 }
 
-static int error_detected_iter(struct device *device, void *data)
-{
-	struct pcie_device *pcie_device;
-	struct pcie_port_service_driver *driver;
-	struct aer_broadcast_data *result_data;
-	pci_ers_result_t status;
-
-	result_data = (struct aer_broadcast_data *) data;
-
-	if (device->bus == &pcie_port_bus_type && device->driver) {
-		driver = to_service_driver(device->driver);
-		if (!driver ||
-			!driver->err_handler ||
-			!driver->err_handler->error_detected)
-			return 0;
-
-		pcie_device = to_pcie_device(device);
-
-		/* Forward error detected message to service drivers */
-		status = driver->err_handler->error_detected(
-			pcie_device->port,
-			result_data->state);
-		result_data->result =
-			merge_result(result_data->result, status);
-	}
-
-	return 0;
-}
-
 static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
 					enum pci_channel_state error)
 {
-	struct aer_broadcast_data data = {error, PCI_ERS_RESULT_CAN_RECOVER};
-
-	/* get true return value from &data */
-	device_for_each_child(&dev->dev, &data, error_detected_iter);
-	return data.result;
-}
-
-static int mmio_enabled_iter(struct device *device, void *data)
-{
-	struct pcie_device *pcie_device;
-	struct pcie_port_service_driver *driver;
-	pci_ers_result_t status, *result;
-
-	result = (pci_ers_result_t *) data;
-
-	if (device->bus == &pcie_port_bus_type && device->driver) {
-		driver = to_service_driver(device->driver);
-		if (driver &&
-			driver->err_handler &&
-			driver->err_handler->mmio_enabled) {
-			pcie_device = to_pcie_device(device);
-
-			/* Forward error message to service drivers */
-			status = driver->err_handler->mmio_enabled(
-					pcie_device->port);
-			*result = merge_result(*result, status);
-		}
-	}
-
-	return 0;
+	/* Root Port has no impact. Always recovers. */
+	return PCI_ERS_RESULT_CAN_RECOVER;
 }
 
 static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev)
 {
-	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
-
-	/* get true return value from &status */
-	device_for_each_child(&dev->dev, &status, mmio_enabled_iter);
-	return status;
-}
-
-static int slot_reset_iter(struct device *device, void *data)
-{
-	struct pcie_device *pcie_device;
-	struct pcie_port_service_driver *driver;
-	pci_ers_result_t status, *result;
-
-	result = (pci_ers_result_t *) data;
-
-	if (device->bus == &pcie_port_bus_type && device->driver) {
-		driver = to_service_driver(device->driver);
-		if (driver &&
-			driver->err_handler &&
-			driver->err_handler->slot_reset) {
-			pcie_device = to_pcie_device(device);
-
-			/* Forward error message to service drivers */
-			status = driver->err_handler->slot_reset(
-					pcie_device->port);
-			*result = merge_result(*result, status);
-		}
-	}
-
-	return 0;
+	return PCI_ERS_RESULT_RECOVERED;
 }
 
 static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
 {
-	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
-
 	/* If fatal, restore cfg space for possible link reset at upstream */
 	if (dev->error_state == pci_channel_io_frozen) {
 		dev->state_saved = true;
@@ -287,9 +198,7 @@ static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
 		pci_enable_pcie_error_reporting(dev);
 	}
 
-	/* get true return value from &status */
-	device_for_each_child(&dev->dev, &status, slot_reset_iter);
-	return status;
+	return PCI_ERS_RESULT_RECOVERED;
 }
 
 static int resume_iter(struct device *device, void *data)
@@ -299,13 +208,11 @@ static int resume_iter(struct device *device, void *data)
 
 	if (device->bus == &pcie_port_bus_type && device->driver) {
 		driver = to_service_driver(device->driver);
-		if (driver &&
-			driver->err_handler &&
-			driver->err_handler->resume) {
+		if (driver && driver->error_resume) {
 			pcie_device = to_pcie_device(device);
 
 			/* Forward error message to service drivers */
-			driver->err_handler->resume(pcie_device->port);
+			driver->error_resume(pcie_device->port);
 		}
 	}
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e6a917b..ff94b69 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1745,21 +1745,50 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
 	 */
 }
 
-static void pci_configure_extended_tags(struct pci_dev *dev)
+int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
 {
-	u32 dev_cap;
+	struct pci_host_bridge *host;
+	u32 cap;
+	u16 ctl;
 	int ret;
 
 	if (!pci_is_pcie(dev))
-		return;
+		return 0;
 
-	ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &dev_cap);
+	ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
 	if (ret)
-		return;
+		return 0;
 
-	if (dev_cap & PCI_EXP_DEVCAP_EXT_TAG)
+	if (!(cap & PCI_EXP_DEVCAP_EXT_TAG))
+		return 0;
+
+	ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
+	if (ret)
+		return 0;
+
+	host = pci_find_host_bridge(dev->bus);
+	if (!host)
+		return 0;
+
+	/*
+	 * If some device in the hierarchy doesn't handle Extended Tags
+	 * correctly, make sure they're disabled.
+	 */
+	if (host->no_ext_tags) {
+		if (ctl & PCI_EXP_DEVCTL_EXT_TAG) {
+			dev_info(&dev->dev, "disabling Extended Tags\n");
+			pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+						   PCI_EXP_DEVCTL_EXT_TAG);
+		}
+		return 0;
+	}
+
+	if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) {
+		dev_info(&dev->dev, "enabling Extended Tags\n");
 		pcie_capability_set_word(dev, PCI_EXP_DEVCTL,
 					 PCI_EXP_DEVCTL_EXT_TAG);
+	}
+	return 0;
 }
 
 /**
@@ -1810,7 +1839,7 @@ static void pci_configure_device(struct pci_dev *dev)
 	int ret;
 
 	pci_configure_mps(dev);
-	pci_configure_extended_tags(dev);
+	pci_configure_extended_tags(dev, NULL);
 	pci_configure_relaxed_ordering(dev);
 
 	memset(&hpp, 0, sizeof(hpp));
@@ -1867,11 +1896,58 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pci_alloc_dev);
 
-bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
-				int crs_timeout)
+static bool pci_bus_crs_vendor_id(u32 l)
+{
+	return (l & 0xffff) == 0x0001;
+}
+
+static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l,
+			     int timeout)
 {
 	int delay = 1;
 
+	if (!pci_bus_crs_vendor_id(*l))
+		return true;	/* not a CRS completion */
+
+	if (!timeout)
+		return false;	/* CRS, but caller doesn't want to wait */
+
+	/*
+	 * We got the reserved Vendor ID that indicates a completion with
+	 * Configuration Request Retry Status (CRS).  Retry until we get a
+	 * valid Vendor ID or we time out.
+	 */
+	while (pci_bus_crs_vendor_id(*l)) {
+		if (delay > timeout) {
+			pr_warn("pci %04x:%02x:%02x.%d: not ready after %dms; giving up\n",
+				pci_domain_nr(bus), bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
+
+			return false;
+		}
+		if (delay >= 1000)
+			pr_info("pci %04x:%02x:%02x.%d: not ready after %dms; waiting\n",
+				pci_domain_nr(bus), bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
+
+		msleep(delay);
+		delay *= 2;
+
+		if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
+			return false;
+	}
+
+	if (delay >= 1000)
+		pr_info("pci %04x:%02x:%02x.%d: ready after %dms\n",
+			pci_domain_nr(bus), bus->number,
+			PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
+
+	return true;
+}
+
+bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
+				int timeout)
+{
 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
 		return false;
 
@@ -1880,28 +1956,8 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
 	    *l == 0x0000ffff || *l == 0xffff0000)
 		return false;
 
-	/*
-	 * Configuration Request Retry Status.  Some root ports return the
-	 * actual device ID instead of the synthetic ID (0xFFFF) required
-	 * by the PCIe spec.  Ignore the device ID and only check for
-	 * (vendor id == 1).
-	 */
-	while ((*l & 0xffff) == 0x0001) {
-		if (!crs_timeout)
-			return false;
-
-		msleep(delay);
-		delay *= 2;
-		if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
-			return false;
-		/* Card hasn't responded in 60 seconds?  Must be stuck. */
-		if (delay > crs_timeout) {
-			printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n",
-			       pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
-			       PCI_FUNC(devfn));
-			return false;
-		}
-	}
+	if (pci_bus_crs_vendor_id(*l))
+		return pci_bus_wait_crs(bus, devfn, l, timeout);
 
 	return true;
 }
@@ -2331,6 +2387,15 @@ void pcie_bus_configure_settings(struct pci_bus *bus)
 }
 EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
 
+/*
+ * Called after each bus is probed, but before its children are examined.  This
+ * is marked as __weak because multiple architectures define it.
+ */
+void __weak pcibios_fixup_bus(struct pci_bus *bus)
+{
+       /* nothing to do, expected to be removed in the future */
+}
+
 unsigned int pci_scan_child_bus(struct pci_bus *bus)
 {
 	unsigned int devfn, pass, max = bus->busn_res.start;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a346487..a2afb44 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2062,7 +2062,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
 
 /*
  * The 82575 and 82598 may experience data corruption issues when transitioning
- * out of L0S.  To prevent this we need to disable L0S on the pci-e link
+ * out of L0S.  To prevent this we need to disable L0S on the PCIe link.
  */
 static void quirk_disable_aspm_l0s(struct pci_dev *dev)
 {
@@ -4227,6 +4227,18 @@ static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
 	return acs_flags ? 0 : 1;
 }
 
+static int pci_quirk_xgene_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	/*
+	 * X-Gene root matching this quirk do not allow peer-to-peer
+	 * transactions with others, allowing masking out these bits as if they
+	 * were unimplemented in the ACS capability.
+	 */
+	acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+
+	return acs_flags ? 0 : 1;
+}
+
 /*
  * Many Intel PCH root ports do provide ACS-like features to disable peer
  * transactions and validate bus numbers in requests, but do not provide an
@@ -4475,6 +4487,8 @@ static const struct pci_dev_acs_enabled {
 	{ 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */
 	/* Cavium ThunderX */
 	{ PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs },
+	/* APM X-Gene */
+	{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },
 	{ 0 }
 };
 
@@ -4747,23 +4761,6 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap);
 
-/*
- * VMD-enabled root ports will change the source ID for all messages
- * to the VMD device. Rather than doing device matching with the source
- * ID, the AER driver should traverse the child device tree, reading
- * AER registers to find the faulting device.
- */
-static void quirk_no_aersid(struct pci_dev *pdev)
-{
-	/* VMD Domain */
-	if (pdev->bus->sysdata && pci_domain_nr(pdev->bus) >= 0x10000)
-		pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
-}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
-
 /* FLR may cause some 82579 devices to hang. */
 static void quirk_intel_no_flr(struct pci_dev *dev)
 {
@@ -4771,3 +4768,34 @@ static void quirk_intel_no_flr(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_intel_no_flr);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_intel_no_flr);
+
+static void quirk_no_ext_tags(struct pci_dev *pdev)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
+
+	if (!bridge)
+		return;
+
+	bridge->no_ext_tags = 1;
+	dev_info(&pdev->dev, "disabling Extended Tags (this device can't handle them)\n");
+
+	pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0142, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
+
+#ifdef CONFIG_PCI_ATS
+/*
+ * Some devices have a broken ATS implementation causing IOMMU stalls.
+ * Don't use ATS for those devices.
+ */
+static void quirk_no_ats(struct pci_dev *pdev)
+{
+	dev_info(&pdev->dev, "disabling ATS (broken on this device)\n");
+	pdev->ats_cap = 0;
+}
+
+/* AMD Stoney platform GPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_no_ats);
+#endif /* CONFIG_PCI_ATS */
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index 81eda3d..86106c4 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -17,12 +17,6 @@
 #include <linux/cache.h>
 #include "pci.h"
 
-void __weak pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	dev_dbg(&dev->dev, "assigning IRQ %02d\n", irq);
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 void pci_assign_irq(struct pci_dev *dev)
 {
 	u8 pin;
@@ -65,29 +59,5 @@ void pci_assign_irq(struct pci_dev *dev)
 
 	/* Always tell the device, so the driver knows what is
 	   the real IRQ to use; the device does not use it. */
-	pcibios_update_irq(dev, irq);
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
-
-void pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
-		    int (*map_irq)(const struct pci_dev *, u8, u8))
-{
-	/*
-	 * Implement pci_fixup_irqs() through pci_assign_irq().
-	 * This code should be remove eventually, it is a wrapper
-	 * around pci_assign_irq() interface to keep current
-	 * pci_fixup_irqs() behaviour unchanged on architecture
-	 * code still relying on its interface.
-	 */
-	struct pci_dev *dev = NULL;
-	struct pci_host_bridge *hbrg = NULL;
-
-	for_each_pci_dev(dev) {
-		hbrg = pci_find_host_bridge(dev->bus);
-		hbrg->swizzle_irq = swizzle;
-		hbrg->map_irq = map_irq;
-		pci_assign_irq(dev);
-		hbrg->swizzle_irq = NULL;
-		hbrg->map_irq = NULL;
-	}
-}
-EXPORT_SYMBOL_GPL(pci_fixup_irqs);
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 85774b7..e576e1a 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -234,6 +234,19 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
 	return 0;
 }
 
+/*
+ * We don't have to worry about legacy ISA devices, so nothing to do here.
+ * This is marked as __weak because multiple architectures define it; it should
+ * eventually go away.
+ */
+resource_size_t __weak pcibios_align_resource(void *data,
+					      const struct resource *res,
+					      resource_size_t size,
+					      resource_size_t align)
+{
+       return res->start;
+}
+
 static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 		int resno, resource_size_t size, resource_size_t align)
 {
diff --git a/drivers/phy/rockchip/phy-rockchip-pcie.c b/drivers/phy/rockchip/phy-rockchip-pcie.c
index 6904633..7cbdde0 100644
--- a/drivers/phy/rockchip/phy-rockchip-pcie.c
+++ b/drivers/phy/rockchip/phy-rockchip-pcie.c
@@ -73,10 +73,38 @@ struct rockchip_pcie_data {
 struct rockchip_pcie_phy {
 	struct rockchip_pcie_data *phy_data;
 	struct regmap *reg_base;
+	struct phy_pcie_instance {
+		struct phy *phy;
+		u32 index;
+	} phys[PHY_MAX_LANE_NUM];
+	struct mutex pcie_mutex;
 	struct reset_control *phy_rst;
 	struct clk *clk_pciephy_ref;
+	int pwr_cnt;
+	int init_cnt;
 };
 
+static struct rockchip_pcie_phy *to_pcie_phy(struct phy_pcie_instance *inst)
+{
+	return container_of(inst, struct rockchip_pcie_phy,
+					phys[inst->index]);
+}
+
+static struct phy *rockchip_pcie_phy_of_xlate(struct device *dev,
+					      struct of_phandle_args *args)
+{
+	struct rockchip_pcie_phy *rk_phy = dev_get_drvdata(dev);
+
+	if (args->args_count == 0)
+		return rk_phy->phys[0].phy;
+
+	if (WARN_ON(args->args[0] >= PHY_MAX_LANE_NUM))
+		return ERR_PTR(-ENODEV);
+
+	return rk_phy->phys[args->args[0]].phy;
+}
+
+
 static inline void phy_wr_cfg(struct rockchip_pcie_phy *rk_phy,
 			      u32 addr, u32 data)
 {
@@ -116,29 +144,59 @@ static inline u32 phy_rd_cfg(struct rockchip_pcie_phy *rk_phy,
 
 static int rockchip_pcie_phy_power_off(struct phy *phy)
 {
-	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	struct phy_pcie_instance *inst = phy_get_drvdata(phy);
+	struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst);
 	int err = 0;
 
+	mutex_lock(&rk_phy->pcie_mutex);
+
+	regmap_write(rk_phy->reg_base,
+		     rk_phy->phy_data->pcie_laneoff,
+		     HIWORD_UPDATE(PHY_LANE_IDLE_OFF,
+				   PHY_LANE_IDLE_MASK,
+				   PHY_LANE_IDLE_A_SHIFT + inst->index));
+
+	if (--rk_phy->pwr_cnt)
+		goto err_out;
+
 	err = reset_control_assert(rk_phy->phy_rst);
 	if (err) {
 		dev_err(&phy->dev, "assert phy_rst err %d\n", err);
-		return err;
+		goto err_restore;
 	}
 
+err_out:
+	mutex_unlock(&rk_phy->pcie_mutex);
 	return 0;
+
+err_restore:
+	rk_phy->pwr_cnt++;
+	regmap_write(rk_phy->reg_base,
+		     rk_phy->phy_data->pcie_laneoff,
+		     HIWORD_UPDATE(!PHY_LANE_IDLE_OFF,
+				   PHY_LANE_IDLE_MASK,
+				   PHY_LANE_IDLE_A_SHIFT + inst->index));
+	mutex_unlock(&rk_phy->pcie_mutex);
+	return err;
 }
 
 static int rockchip_pcie_phy_power_on(struct phy *phy)
 {
-	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	struct phy_pcie_instance *inst = phy_get_drvdata(phy);
+	struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst);
 	int err = 0;
 	u32 status;
 	unsigned long timeout;
 
+	mutex_lock(&rk_phy->pcie_mutex);
+
+	if (rk_phy->pwr_cnt++)
+		goto err_out;
+
 	err = reset_control_deassert(rk_phy->phy_rst);
 	if (err) {
 		dev_err(&phy->dev, "deassert phy_rst err %d\n", err);
-		return err;
+		goto err_pwr_cnt;
 	}
 
 	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
@@ -146,6 +204,12 @@ static int rockchip_pcie_phy_power_on(struct phy *phy)
 				   PHY_CFG_ADDR_MASK,
 				   PHY_CFG_ADDR_SHIFT));
 
+	regmap_write(rk_phy->reg_base,
+		     rk_phy->phy_data->pcie_laneoff,
+		     HIWORD_UPDATE(!PHY_LANE_IDLE_OFF,
+				   PHY_LANE_IDLE_MASK,
+				   PHY_LANE_IDLE_A_SHIFT + inst->index));
+
 	/*
 	 * No documented timeout value for phy operation below,
 	 * so we make it large enough here. And we use loop-break
@@ -214,18 +278,29 @@ static int rockchip_pcie_phy_power_on(struct phy *phy)
 		goto err_pll_lock;
 	}
 
+err_out:
+	mutex_unlock(&rk_phy->pcie_mutex);
 	return 0;
 
 err_pll_lock:
 	reset_control_assert(rk_phy->phy_rst);
+err_pwr_cnt:
+	rk_phy->pwr_cnt--;
+	mutex_unlock(&rk_phy->pcie_mutex);
 	return err;
 }
 
 static int rockchip_pcie_phy_init(struct phy *phy)
 {
-	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	struct phy_pcie_instance *inst = phy_get_drvdata(phy);
+	struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst);
 	int err = 0;
 
+	mutex_lock(&rk_phy->pcie_mutex);
+
+	if (rk_phy->init_cnt++)
+		goto err_out;
+
 	err = clk_prepare_enable(rk_phy->clk_pciephy_ref);
 	if (err) {
 		dev_err(&phy->dev, "Fail to enable pcie ref clock.\n");
@@ -238,20 +313,33 @@ static int rockchip_pcie_phy_init(struct phy *phy)
 		goto err_reset;
 	}
 
-	return err;
+err_out:
+	mutex_unlock(&rk_phy->pcie_mutex);
+	return 0;
 
 err_reset:
+
 	clk_disable_unprepare(rk_phy->clk_pciephy_ref);
 err_refclk:
+	rk_phy->init_cnt--;
+	mutex_unlock(&rk_phy->pcie_mutex);
 	return err;
 }
 
 static int rockchip_pcie_phy_exit(struct phy *phy)
 {
-	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	struct phy_pcie_instance *inst = phy_get_drvdata(phy);
+	struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst);
+
+	mutex_lock(&rk_phy->pcie_mutex);
+
+	if (--rk_phy->init_cnt)
+		goto err_init_cnt;
 
 	clk_disable_unprepare(rk_phy->clk_pciephy_ref);
 
+err_init_cnt:
+	mutex_unlock(&rk_phy->pcie_mutex);
 	return 0;
 }
 
@@ -283,10 +371,11 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct rockchip_pcie_phy *rk_phy;
-	struct phy *generic_phy;
 	struct phy_provider *phy_provider;
 	struct regmap *grf;
 	const struct of_device_id *of_id;
+	int i;
+	u32 phy_num;
 
 	grf = syscon_node_to_regmap(dev->parent->of_node);
 	if (IS_ERR(grf)) {
@@ -305,6 +394,8 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev)
 	rk_phy->phy_data = (struct rockchip_pcie_data *)of_id->data;
 	rk_phy->reg_base = grf;
 
+	mutex_init(&rk_phy->pcie_mutex);
+
 	rk_phy->phy_rst = devm_reset_control_get(dev, "phy");
 	if (IS_ERR(rk_phy->phy_rst)) {
 		if (PTR_ERR(rk_phy->phy_rst) != -EPROBE_DEFER)
@@ -319,14 +410,26 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev)
 		return PTR_ERR(rk_phy->clk_pciephy_ref);
 	}
 
-	generic_phy = devm_phy_create(dev, dev->of_node, &ops);
-	if (IS_ERR(generic_phy)) {
-		dev_err(dev, "failed to create PHY\n");
-		return PTR_ERR(generic_phy);
+	/* parse #phy-cells to see if it's legacy PHY model */
+	if (of_property_read_u32(dev->of_node, "#phy-cells", &phy_num))
+		return -ENOENT;
+
+	phy_num = (phy_num == 0) ? 1 : PHY_MAX_LANE_NUM;
+	dev_dbg(dev, "phy number is %d\n", phy_num);
+
+	for (i = 0; i < phy_num; i++) {
+		rk_phy->phys[i].phy = devm_phy_create(dev, dev->of_node, &ops);
+		if (IS_ERR(rk_phy->phys[i].phy)) {
+			dev_err(dev, "failed to create PHY%d\n", i);
+			return PTR_ERR(rk_phy->phys[i].phy);
+		}
+		rk_phy->phys[i].index = i;
+		phy_set_drvdata(rk_phy->phys[i].phy, &rk_phy->phys[i]);
 	}
 
-	phy_set_drvdata(generic_phy, rk_phy);
-	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	platform_set_drvdata(pdev, rk_phy);
+	phy_provider = devm_of_phy_provider_register(dev,
+					rockchip_pcie_phy_of_xlate);
 
 	return PTR_ERR_OR_ZERO(phy_provider);
 }
diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 0831b42..4eb8e1a 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c
@@ -255,12 +255,13 @@ static int parse_rgb(const char *buf, struct platform_zone *zone)
 
 static struct platform_zone *match_zone(struct device_attribute *attr)
 {
-	int i;
-	for (i = 0; i < quirks->num_zones; i++) {
-		if ((struct device_attribute *)zone_data[i].attr == attr) {
+	u8 zone;
+
+	for (zone = 0; zone < quirks->num_zones; zone++) {
+		if ((struct device_attribute *)zone_data[zone].attr == attr) {
 			pr_debug("alienware-wmi: matched zone location: %d\n",
-				 zone_data[i].location);
-			return &zone_data[i];
+				 zone_data[zone].location);
+			return &zone_data[zone];
 		}
 	}
 	return NULL;
@@ -420,7 +421,7 @@ static DEVICE_ATTR(lighting_control_state, 0644, show_control_state,
 
 static int alienware_zone_init(struct platform_device *dev)
 {
-	int i;
+	u8 zone;
 	char buffer[10];
 	char *name;
 
@@ -457,19 +458,19 @@ static int alienware_zone_init(struct platform_device *dev)
 	if (!zone_data)
 		return -ENOMEM;
 
-	for (i = 0; i < quirks->num_zones; i++) {
-		sprintf(buffer, "zone%02X", i);
+	for (zone = 0; zone < quirks->num_zones; zone++) {
+		sprintf(buffer, "zone%02hhX", zone);
 		name = kstrdup(buffer, GFP_KERNEL);
 		if (name == NULL)
 			return 1;
-		sysfs_attr_init(&zone_dev_attrs[i].attr);
-		zone_dev_attrs[i].attr.name = name;
-		zone_dev_attrs[i].attr.mode = 0644;
-		zone_dev_attrs[i].show = zone_show;
-		zone_dev_attrs[i].store = zone_set;
-		zone_data[i].location = i;
-		zone_attrs[i] = &zone_dev_attrs[i].attr;
-		zone_data[i].attr = &zone_dev_attrs[i];
+		sysfs_attr_init(&zone_dev_attrs[zone].attr);
+		zone_dev_attrs[zone].attr.name = name;
+		zone_dev_attrs[zone].attr.mode = 0644;
+		zone_dev_attrs[zone].show = zone_show;
+		zone_dev_attrs[zone].store = zone_set;
+		zone_data[zone].location = zone;
+		zone_attrs[zone] = &zone_dev_attrs[zone].attr;
+		zone_data[zone].attr = &zone_dev_attrs[zone];
 	}
 	zone_attrs[quirks->num_zones] = &dev_attr_lighting_control_state.attr;
 	zone_attribute_group.attrs = zone_attrs;
@@ -481,12 +482,13 @@ static int alienware_zone_init(struct platform_device *dev)
 
 static void alienware_zone_exit(struct platform_device *dev)
 {
+	u8 zone;
+
 	sysfs_remove_group(&dev->dev.kobj, &zone_attribute_group);
 	led_classdev_unregister(&global_led);
 	if (zone_dev_attrs) {
-		int i;
-		for (i = 0; i < quirks->num_zones; i++)
-			kfree(zone_dev_attrs[i].attr.name);
+		for (zone = 0; zone < quirks->num_zones; zone++)
+			kfree(zone_dev_attrs[zone].attr.name);
 	}
 	kfree(zone_dev_attrs);
 	kfree(zone_data);
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 709e3a6..48e1541 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -299,7 +299,7 @@ static int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
 	union acpi_object *obj;
 	u32 tmp = 0;
 
-	status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, 1, method_id,
+	status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, 0, method_id,
 				     &input, &output);
 
 	if (ACPI_FAILURE(status))
@@ -1946,7 +1946,7 @@ static int show_call(struct seq_file *m, void *data)
 	acpi_status status;
 
 	status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID,
-				     1, asus->debug.method_id,
+				     0, asus->debug.method_id,
 				     &input, &output);
 
 	if (ACPI_FAILURE(status))
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index dad8f4a..28d9f86 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -48,7 +48,6 @@ MODULE_LICENSE("GPL");
 #define DELL_EVENT_GUID "9DBB5994-A997-11DA-B012-B622A1EF5492"
 #define DELL_DESCRIPTOR_GUID "8D9DDCBC-A997-11DA-B012-B622A1EF5492"
 
-static u32 dell_wmi_interface_version;
 static bool wmi_requires_smbios_request;
 
 MODULE_ALIAS("wmi:"DELL_EVENT_GUID);
@@ -56,6 +55,7 @@ MODULE_ALIAS("wmi:"DELL_DESCRIPTOR_GUID);
 
 struct dell_wmi_priv {
 	struct input_dev *input_dev;
+	u32 interface_version;
 };
 
 static int __init dmi_matched(const struct dmi_system_id *dmi)
@@ -348,6 +348,7 @@ static void dell_wmi_process_key(struct wmi_device *wdev, int type, int code)
 static void dell_wmi_notify(struct wmi_device *wdev,
 			    union acpi_object *obj)
 {
+	struct dell_wmi_priv *priv = dev_get_drvdata(&wdev->dev);
 	u16 *buffer_entry, *buffer_end;
 	acpi_size buffer_size;
 	int len, i;
@@ -376,7 +377,7 @@ static void dell_wmi_notify(struct wmi_device *wdev,
 	 * So to prevent reading garbage from buffer we will process only first
 	 * one event on devices with WMI interface version 0.
 	 */
-	if (dell_wmi_interface_version == 0 && buffer_entry < buffer_end)
+	if (priv->interface_version == 0 && buffer_entry < buffer_end)
 		if (buffer_end > buffer_entry + buffer_entry[0] + 1)
 			buffer_end = buffer_entry + buffer_entry[0] + 1;
 
@@ -626,61 +627,67 @@ static void dell_wmi_input_destroy(struct wmi_device *wdev)
  * WMI Interface Version     8       4    <version>
  * WMI buffer length        12       4    4096
  */
-static int dell_wmi_check_descriptor_buffer(void)
+static int dell_wmi_check_descriptor_buffer(struct wmi_device *wdev)
 {
-	struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
-	acpi_status status;
+	struct dell_wmi_priv *priv = dev_get_drvdata(&wdev->dev);
+	union acpi_object *obj = NULL;
+	struct wmi_device *desc_dev;
 	u32 *buffer;
+	int ret;
 
-	status = wmi_query_block(DELL_DESCRIPTOR_GUID, 0, &out);
-	if (ACPI_FAILURE(status)) {
-		pr_err("Cannot read Dell descriptor buffer - %d\n", status);
-		return status;
+	desc_dev = wmidev_get_other_guid(wdev, DELL_DESCRIPTOR_GUID);
+	if (!desc_dev) {
+		dev_err(&wdev->dev, "Dell WMI descriptor does not exist\n");
+		return -ENODEV;
 	}
 
-	obj = (union acpi_object *)out.pointer;
+	obj = wmidev_block_query(desc_dev, 0);
 	if (!obj) {
-		pr_err("Dell descriptor buffer is empty\n");
-		return -EINVAL;
+		dev_err(&wdev->dev, "failed to read Dell WMI descriptor\n");
+		ret = -EIO;
+		goto out;
 	}
 
 	if (obj->type != ACPI_TYPE_BUFFER) {
-		pr_err("Cannot read Dell descriptor buffer\n");
-		kfree(obj);
-		return -EINVAL;
+		dev_err(&wdev->dev, "Dell descriptor has wrong type\n");
+		ret = -EINVAL;
+		goto out;
 	}
 
 	if (obj->buffer.length != 128) {
-		pr_err("Dell descriptor buffer has invalid length (%d)\n",
+		dev_err(&wdev->dev,
+			"Dell descriptor buffer has invalid length (%d)\n",
 			obj->buffer.length);
 		if (obj->buffer.length < 16) {
-			kfree(obj);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 	}
 
 	buffer = (u32 *)obj->buffer.pointer;
 
 	if (buffer[0] != 0x4C4C4544 && buffer[1] != 0x494D5720)
-		pr_warn("Dell descriptor buffer has invalid signature (%*ph)\n",
+		dev_warn(&wdev->dev, "Dell descriptor buffer has invalid signature (%*ph)\n",
 			8, buffer);
 
 	if (buffer[2] != 0 && buffer[2] != 1)
-		pr_warn("Dell descriptor buffer has unknown version (%d)\n",
+		dev_warn(&wdev->dev, "Dell descriptor buffer has unknown version (%d)\n",
 			buffer[2]);
 
 	if (buffer[3] != 4096)
-		pr_warn("Dell descriptor buffer has invalid buffer length (%d)\n",
+		dev_warn(&wdev->dev, "Dell descriptor buffer has invalid buffer length (%d)\n",
 			buffer[3]);
 
-	dell_wmi_interface_version = buffer[2];
+	priv->interface_version = buffer[2];
+	ret = 0;
 
-	pr_info("Detected Dell WMI interface version %u\n",
-		dell_wmi_interface_version);
+	dev_info(&wdev->dev, "Detected Dell WMI interface version %u\n",
+		priv->interface_version);
 
+out:
 	kfree(obj);
-	return 0;
+	put_device(&desc_dev->dev);
+	return ret;
 }
 
 /*
@@ -717,17 +724,19 @@ static int dell_wmi_events_set_enabled(bool enable)
 
 static int dell_wmi_probe(struct wmi_device *wdev)
 {
+	struct dell_wmi_priv *priv;
 	int err;
 
-	struct dell_wmi_priv *priv = devm_kzalloc(
+	priv = devm_kzalloc(
 		&wdev->dev, sizeof(struct dell_wmi_priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	dev_set_drvdata(&wdev->dev, priv);
 
-	err = dell_wmi_check_descriptor_buffer();
+	err = dell_wmi_check_descriptor_buffer(wdev);
 	if (err)
 		return err;
 
-	dev_set_drvdata(&wdev->dev, priv);
-
 	return dell_wmi_input_setup(wdev);
 }
 
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 0df4209..b4ed3dc 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -107,13 +107,6 @@ enum hp_wmi_hardware_mask {
 	HPWMI_TABLET_MASK	= 0x04,
 };
 
-#define BIOS_ARGS_INIT(write, ctype, size)				\
-	(struct bios_args)	{	.signature = 0x55434553,	\
-					.command = (write) ? 0x2 : 0x1,	\
-					.commandtype = (ctype),		\
-					.datasize = (size),		\
-					.data = 0 }
-
 struct bios_return {
 	u32 sigpass;
 	u32 return_code;
@@ -188,6 +181,22 @@ struct rfkill2_device {
 static int rfkill2_count;
 static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES];
 
+/* map output size to the corresponding WMI method id */
+static inline int encode_outsize_for_pvsz(int outsize)
+{
+	if (outsize > 4096)
+		return -EINVAL;
+	if (outsize > 1024)
+		return 5;
+	if (outsize > 128)
+		return 4;
+	if (outsize > 4)
+		return 3;
+	if (outsize > 0)
+		return 2;
+	return 1;
+}
+
 /*
  * hp_wmi_perform_query
  *
@@ -211,6 +220,7 @@ static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES];
 static int hp_wmi_perform_query(int query, enum hp_wmi_command command,
 				void *buffer, int insize, int outsize)
 {
+	int mid;
 	struct bios_return *bios_return;
 	int actual_outsize;
 	union acpi_object *obj;
@@ -225,11 +235,15 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command,
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	int ret = 0;
 
+	mid = encode_outsize_for_pvsz(outsize);
+	if (WARN_ON(mid < 0))
+		return mid;
+
 	if (WARN_ON(insize > sizeof(args.data)))
 		return -EINVAL;
 	memcpy(&args.data, buffer, insize);
 
-	wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output);
+	wmi_evaluate_method(HPWMI_BIOS_GUID, 0, mid, &input, &output);
 
 	obj = output.pointer;
 
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index c62e5e1..610ac83 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -103,7 +103,7 @@ static void rtl_port_unmap(void __iomem *addr)
 static int ibm_rtl_write(u8 value)
 {
 	int ret = 0, count = 0;
-	static u32 cmd_port_val;
+	u32 cmd_port_val;
 
 	RTL_DEBUG("%s(%d)\n", __func__, value);
 
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 603fc60..fe98d4a 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -42,6 +42,8 @@
 
 #define IDEAPAD_RFKILL_DEV_NUM	(3)
 
+#define BM_CONSERVATION_BIT (5)
+
 #define CFG_BT_BIT	(16)
 #define CFG_3G_BIT	(17)
 #define CFG_WIFI_BIT	(18)
@@ -55,6 +57,11 @@ static const char *const ideapad_wmi_fnesc_events[] = {
 #endif
 
 enum {
+	BMCMD_CONSERVATION_ON = 3,
+	BMCMD_CONSERVATION_OFF = 5,
+};
+
+enum {
 	VPCCMD_R_VPC1 = 0x10,
 	VPCCMD_R_BL_MAX,
 	VPCCMD_R_BL,
@@ -123,6 +130,23 @@ static int read_method_int(acpi_handle handle, const char *method, int *val)
 	}
 }
 
+static int method_gbmd(acpi_handle handle, unsigned long *ret)
+{
+	int result, val;
+
+	result = read_method_int(handle, "GBMD", &val);
+	*ret = val;
+	return result;
+}
+
+static int method_sbmc(acpi_handle handle, int cmd)
+{
+	acpi_status status;
+
+	status = acpi_execute_simple_method(handle, "SBMC", cmd);
+	return ACPI_FAILURE(status) ? -1 : 0;
+}
+
 static int method_vpcr(acpi_handle handle, int cmd, int *ret)
 {
 	acpi_status status;
@@ -250,6 +274,13 @@ static int debugfs_status_show(struct seq_file *s, void *data)
 	if (!read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &value))
 		seq_printf(s, "Camera status:\t%s(%lu)\n",
 			   value ? "On" : "Off", value);
+	seq_puts(s, "=====================\n");
+
+	if (!method_gbmd(priv->adev->handle, &value)) {
+		seq_printf(s, "Conservation mode:\t%s(%lu)\n",
+			   test_bit(BM_CONSERVATION_BIT, &value) ? "On" : "Off",
+			   value);
+	}
 
 	return 0;
 }
@@ -456,10 +487,45 @@ static ssize_t __maybe_unused touchpad_store(struct device *dev,
 
 static DEVICE_ATTR_RO(touchpad);
 
+static ssize_t conservation_mode_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ideapad_private *priv = dev_get_drvdata(dev);
+	unsigned long result;
+
+	if (method_gbmd(priv->adev->handle, &result))
+		return sprintf(buf, "-1\n");
+	return sprintf(buf, "%u\n", test_bit(BM_CONSERVATION_BIT, &result));
+}
+
+static ssize_t conservation_mode_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct ideapad_private *priv = dev_get_drvdata(dev);
+	bool state;
+	int ret;
+
+	ret = kstrtobool(buf, &state);
+	if (ret)
+		return ret;
+
+	ret = method_sbmc(priv->adev->handle, state ?
+					      BMCMD_CONSERVATION_ON :
+					      BMCMD_CONSERVATION_OFF);
+	if (ret < 0)
+		return -EIO;
+	return count;
+}
+
+static DEVICE_ATTR_RW(conservation_mode);
+
 static struct attribute *ideapad_attributes[] = {
 	&dev_attr_camera_power.attr,
 	&dev_attr_fan_mode.attr,
 	&dev_attr_touchpad.attr,
+	&dev_attr_conservation_mode.attr,
 	NULL
 };
 
@@ -477,6 +543,9 @@ static umode_t ideapad_is_visible(struct kobject *kobj,
 		unsigned long value;
 		supported = !read_ec_data(priv->adev->handle, VPCCMD_R_FAN,
 					  &value);
+	} else if (attr == &dev_attr_conservation_mode.attr) {
+		supported = acpi_has_method(priv->adev->handle, "GBMD") &&
+			    acpi_has_method(priv->adev->handle, "SBMC");
 	} else
 		supported = true;
 
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index a782c78..e34fd70 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -230,7 +230,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
 	if (event != 0xc0) {
 		if (!priv->array ||
 		    !sparse_keymap_report_event(priv->array, event, 1, true))
-			dev_info(&device->dev, "unknown event 0x%x\n", event);
+			dev_dbg(&device->dev, "unknown event 0x%x\n", event);
 		return;
 	}
 
@@ -241,7 +241,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
 	}
 
 	if (!sparse_keymap_report_event(priv->input_dev, ev_index, 1, true))
-		dev_info(&device->dev, "unknown event index 0x%llx\n",
+		dev_dbg(&device->dev, "unknown event index 0x%llx\n",
 			 ev_index);
 }
 
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index 4809267..58c5ff3 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -83,7 +83,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
 	} else if (sparse_keymap_report_event(priv->input_dev, event, 1, true)) {
 		return;
 	}
-	dev_info(&device->dev, "unknown event index 0x%x\n", event);
+	dev_dbg(&device->dev, "unknown event index 0x%x\n", event);
 }
 
 static int intel_vbtn_probe(struct platform_device *device)
diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c
index 871cfa6..d79fbf9 100644
--- a/drivers/platform/x86/intel_mid_powerbtn.c
+++ b/drivers/platform/x86/intel_mid_powerbtn.c
@@ -108,13 +108,13 @@ static irqreturn_t mid_pb_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static struct mid_pb_ddata mfld_ddata = {
+static const struct mid_pb_ddata mfld_ddata = {
 	.mirqlvl1_addr	= INTEL_MSIC_IRQLVL1MSK,
 	.pbstat_addr	= INTEL_MSIC_PBSTATUS,
 	.pbstat_mask	= MSIC_PB_LEVEL,
 };
 
-static struct mid_pb_ddata mrfld_ddata = {
+static const struct mid_pb_ddata mrfld_ddata = {
 	.mirqlvl1_addr	= BCOVE_IRQLVL1MSK,
 	.pbstat_addr	= BCOVE_PBSTATUS,
 	.pbstat_mask	= BCOVE_PB_LEVEL,
@@ -142,8 +142,10 @@ static int mid_pb_probe(struct platform_device *pdev)
 	if (!id)
 		return -ENODEV;
 
-	if (irq < 0)
-		return -EINVAL;
+	if (irq < 0) {
+		dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq);
+		return irq;
+	}
 
 	input = devm_input_allocate_device(&pdev->dev);
 	if (!input)
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c
index 914bcd2..17e08b4 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -110,6 +110,13 @@ static const struct pmc_reg_map spt_reg_map = {
 	.pfear_sts = spt_pfear_map,
 	.mphy_sts = spt_mphy_map,
 	.pll_sts = spt_pll_map,
+	.slp_s0_offset = SPT_PMC_SLP_S0_RES_COUNTER_OFFSET,
+	.ltr_ignore_offset = SPT_PMC_LTR_IGNORE_OFFSET,
+	.regmap_length = SPT_PMC_MMIO_REG_LEN,
+	.ppfear0_offset = SPT_PMC_XRAM_PPFEAR0A,
+	.ppfear_buckets = SPT_PPFEAR_NUM_ENTRIES,
+	.pm_cfg_offset = SPT_PMC_PM_CFG_OFFSET,
+	.pm_read_disable_bit = SPT_PMC_READ_DISABLE_BIT,
 };
 
 static const struct pci_device_id pmc_pci_ids[] = {
@@ -157,12 +164,13 @@ static inline u32 pmc_core_adjust_slp_s0_step(u32 value)
 int intel_pmc_slp_s0_counter_read(u32 *data)
 {
 	struct pmc_dev *pmcdev = &pmc;
+	const struct pmc_reg_map *map = pmcdev->map;
 	u32 value;
 
 	if (!pmcdev->has_slp_s0_res)
 		return -EACCES;
 
-	value = pmc_core_reg_read(pmcdev, SPT_PMC_SLP_S0_RES_COUNTER_OFFSET);
+	value = pmc_core_reg_read(pmcdev, map->slp_s0_offset);
 	*data = pmc_core_adjust_slp_s0_step(value);
 
 	return 0;
@@ -172,9 +180,10 @@ EXPORT_SYMBOL_GPL(intel_pmc_slp_s0_counter_read);
 static int pmc_core_dev_state_get(void *data, u64 *val)
 {
 	struct pmc_dev *pmcdev = data;
+	const struct pmc_reg_map *map = pmcdev->map;
 	u32 value;
 
-	value = pmc_core_reg_read(pmcdev, SPT_PMC_SLP_S0_RES_COUNTER_OFFSET);
+	value = pmc_core_reg_read(pmcdev, map->slp_s0_offset);
 	*val = pmc_core_adjust_slp_s0_step(value);
 
 	return 0;
@@ -187,8 +196,8 @@ static int pmc_core_check_read_lock_bit(void)
 	struct pmc_dev *pmcdev = &pmc;
 	u32 value;
 
-	value = pmc_core_reg_read(pmcdev, SPT_PMC_PM_CFG_OFFSET);
-	return value & BIT(SPT_PMC_READ_DISABLE_BIT);
+	value = pmc_core_reg_read(pmcdev, pmcdev->map->pm_cfg_offset);
+	return value & BIT(pmcdev->map->pm_read_disable_bit);
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -204,12 +213,13 @@ static int pmc_core_ppfear_sts_show(struct seq_file *s, void *unused)
 {
 	struct pmc_dev *pmcdev = s->private;
 	const struct pmc_bit_map *map = pmcdev->map->pfear_sts;
-	u8 pf_regs[NUM_ENTRIES];
+	u8 pf_regs[PPFEAR_MAX_NUM_ENTRIES];
 	int index, iter;
 
-	iter = SPT_PMC_XRAM_PPFEAR0A;
+	iter = pmcdev->map->ppfear0_offset;
 
-	for (index = 0; index < NUM_ENTRIES; index++, iter++)
+	for (index = 0; index < pmcdev->map->ppfear_buckets &&
+	     index < PPFEAR_MAX_NUM_ENTRIES; index++, iter++)
 		pf_regs[index] = pmc_core_reg_read_byte(pmcdev, iter);
 
 	for (index = 0; map[index].name; index++)
@@ -376,6 +386,7 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, const char __user
 *userbuf, size_t count, loff_t *ppos)
 {
 	struct pmc_dev *pmcdev = &pmc;
+	const struct pmc_reg_map *map = pmcdev->map;
 	u32 val, buf_size, fd;
 	int err = 0;
 
@@ -392,9 +403,9 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, const char __user
 		goto out_unlock;
 	}
 
-	fd = pmc_core_reg_read(pmcdev, SPT_PMC_LTR_IGNORE_OFFSET);
+	fd = pmc_core_reg_read(pmcdev, map->ltr_ignore_offset);
 	fd |= (1U << val);
-	pmc_core_reg_write(pmcdev, SPT_PMC_LTR_IGNORE_OFFSET, fd);
+	pmc_core_reg_write(pmcdev, map->ltr_ignore_offset, fd);
 
 out_unlock:
 	mutex_unlock(&pmcdev->lock);
@@ -530,8 +541,8 @@ static int pmc_core_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	}
 
 	mutex_init(&pmcdev->lock);
-	pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit();
 	pmcdev->map = map;
+	pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit();
 
 	err = pmc_core_dbgfs_register(pmcdev);
 	if (err < 0)
diff --git a/drivers/platform/x86/intel_pmc_core.h b/drivers/platform/x86/intel_pmc_core.h
index 5a48e77..3d225a9 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel_pmc_core.h
@@ -38,7 +38,8 @@
 #define SPT_PMC_SLP_S0_RES_COUNTER_STEP		0x64
 #define PMC_BASE_ADDR_MASK			~(SPT_PMC_MMIO_REG_LEN - 1)
 #define MTPMC_MASK				0xffff0000
-#define NUM_ENTRIES				5
+#define PPFEAR_MAX_NUM_ENTRIES			5
+#define SPT_PPFEAR_NUM_ENTRIES			5
 #define SPT_PMC_READ_DISABLE_BIT		0x16
 #define SPT_PMC_MSG_FULL_STS_BIT		0x18
 #define NUM_RETRIES				100
@@ -126,10 +127,37 @@ struct pmc_bit_map {
 	u32 bit_mask;
 };
 
+/**
+ * struct pmc_reg_map - Structure used to define parameter unique to a
+			PCH family
+ * @pfear_sts:		Maps name of IP block to PPFEAR* bit
+ * @mphy_sts:		Maps name of MPHY lane to MPHY status lane status bit
+ * @pll_sts:		Maps name of PLL to corresponding bit status
+ * @slp_s0_offset:	PWRMBASE offset to read SLP_S0 residency
+ * @ltr_ignore_offset:	PWRMBASE offset to read/write LTR ignore bit
+ * @base_address:	Base address of PWRMBASE defined in BIOS writer guide
+ * @regmap_length:	Length of memory to map from PWRMBASE address to access
+ * @ppfear0_offset:	PWRMBASE offset to to read PPFEAR*
+ * @ppfear_buckets:	Number of 8 bits blocks to read all IP blocks from
+ *			PPFEAR
+ * @pm_cfg_offset:	PWRMBASE offset to PM_CFG register
+ * @pm_read_disable_bit: Bit index to read PMC_READ_DISABLE
+ *
+ * Each PCH has unique set of register offsets and bit indexes. This structure
+ * captures them to have a common implementation.
+ */
 struct pmc_reg_map {
 	const struct pmc_bit_map *pfear_sts;
 	const struct pmc_bit_map *mphy_sts;
 	const struct pmc_bit_map *pll_sts;
+	const u32 slp_s0_offset;
+	const u32 ltr_ignore_offset;
+	const u32 base_address;
+	const int regmap_length;
+	const u32 ppfear0_offset;
+	const int ppfear_buckets;
+	const u32 pm_cfg_offset;
+	const int pm_read_disable_bit;
 };
 
 /**
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index f7cf981..2c85f75 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -72,20 +72,20 @@ struct intel_scu_ipc_pdata_t {
 	u8 irq_mode;
 };
 
-static struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = {
+static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = {
 	.i2c_base = 0xff12b000,
 	.i2c_len = 0x10,
 	.irq_mode = 0,
 };
 
 /* Penwell and Cloverview */
-static struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = {
+static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = {
 	.i2c_base = 0xff12b000,
 	.i2c_len = 0x10,
 	.irq_mode = 1,
 };
 
-static struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = {
+static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = {
 	.i2c_base  = 0xff00d000,
 	.i2c_len = 0x10,
 	.irq_mode = 0,
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index cd21df9..d4fc42b 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -331,6 +331,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = {
 
 static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = {
 	TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf),
+	TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_apl_debugfs_conf),
 	{}
 };
 
diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
index 6ebdbd2..e0424d5 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
@@ -46,7 +46,6 @@
 #define TELEM_SAMPLING_DEFAULT_PERIOD	0xD
 
 #define TELEM_MAX_EVENTS_SRAM		28
-#define TELEM_MAX_OS_ALLOCATED_EVENTS	20
 #define TELEM_SSRAM_STARTTIME_OFFSET	8
 #define TELEM_SSRAM_EVTLOG_OFFSET	16
 
@@ -153,6 +152,30 @@ static struct telemetry_evtmap
 	{"PC2_AND_MEM_SHALLOW_IDLE_RES",	0x1D40},
 };
 
+static struct telemetry_evtmap
+	telemetry_glk_pss_default_events[TELEM_MAX_OS_ALLOCATED_EVENTS] = {
+	{"IA_CORE0_C6_RES",			0x0400},
+	{"IA_CORE0_C6_CTR",			0x0000},
+	{"IA_MODULE0_C7_RES",			0x0410},
+	{"IA_MODULE0_C7_CTR",			0x000C},
+	{"IA_C0_RES",				0x0805},
+	{"PCS_LTR",				0x2801},
+	{"PSTATES",				0x2802},
+	{"SOC_S0I3_RES",			0x0407},
+	{"SOC_S0I3_CTR",			0x0008},
+	{"PCS_S0I3_CTR",			0x0007},
+	{"PCS_C1E_RES",				0x0414},
+	{"PCS_IDLE_STATUS",			0x2806},
+	{"IA_PERF_LIMITS",			0x280B},
+	{"GT_PERF_LIMITS",			0x280C},
+	{"PCS_WAKEUP_S0IX_CTR",			0x0025},
+	{"PCS_IDLE_BLOCKED",			0x2C00},
+	{"PCS_S0IX_BLOCKED",			0x2C01},
+	{"PCS_S0IX_WAKE_REASONS",		0x2C02},
+	{"PCS_LTR_BLOCKING",			0x2C03},
+	{"PC2_AND_MEM_SHALLOW_IDLE_RES",	0x1D40},
+};
+
 /* APL specific Data */
 static struct telemetry_plt_config telem_apl_config = {
 	.pss_config = {
@@ -163,8 +186,19 @@ static struct telemetry_plt_config telem_apl_config = {
 	},
 };
 
+/* GLK specific Data */
+static struct telemetry_plt_config telem_glk_config = {
+	.pss_config = {
+		.telem_evts = telemetry_glk_pss_default_events,
+	},
+	.ioss_config = {
+		.telem_evts = telemetry_apl_ioss_default_events,
+	},
+};
+
 static const struct x86_cpu_id telemetry_cpu_ids[] = {
 	TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config),
+	TELEM_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_glk_config),
 	{}
 };
 
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index f6209b7..6201382 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -184,7 +184,7 @@ static const struct backlight_ops msi_backlight_ops = {
 static void msi_wmi_notify(u32 value, void *context)
 {
 	struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
-	static struct key_entry *key;
+	struct key_entry *key;
 	union acpi_object *obj;
 	acpi_status status;
 
diff --git a/drivers/platform/x86/mxm-wmi.c b/drivers/platform/x86/mxm-wmi.c
index f4bad83..35d8b9a 100644
--- a/drivers/platform/x86/mxm-wmi.c
+++ b/drivers/platform/x86/mxm-wmi.c
@@ -53,7 +53,7 @@ int mxm_wmi_call_mxds(int adapter)
 
 	printk("calling mux switch %d\n", adapter);
 
-	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input,
+	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input,
 				     &output);
 
 	if (ACPI_FAILURE(status))
@@ -78,7 +78,7 @@ int mxm_wmi_call_mxmx(int adapter)
 
 	printk("calling mux switch %d\n", adapter);
 
-	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input,
+	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input,
 				     &output);
 
 	if (ACPI_FAILURE(status))
diff --git a/drivers/platform/x86/peaq-wmi.c b/drivers/platform/x86/peaq-wmi.c
index 77d1f90..bc98ef9 100644
--- a/drivers/platform/x86/peaq-wmi.c
+++ b/drivers/platform/x86/peaq-wmi.c
@@ -39,7 +39,7 @@ static void peaq_wmi_poll(struct input_polled_dev *dev)
 	struct acpi_buffer input = { sizeof(dummy), &dummy };
 	struct acpi_buffer output = { sizeof(obj), &obj };
 
-	status = wmi_evaluate_method(PEAQ_DOLBY_BUTTON_GUID, 1,
+	status = wmi_evaluate_method(PEAQ_DOLBY_BUTTON_GUID, 0,
 				     PEAQ_DOLBY_BUTTON_METHOD_ID,
 				     &input, &output);
 	if (ACPI_FAILURE(status))
@@ -51,7 +51,7 @@ static void peaq_wmi_poll(struct input_polled_dev *dev)
 		return;
 	}
 
-	if (peaq_ignore_events_counter && --peaq_ignore_events_counter >= 0)
+	if (peaq_ignore_events_counter && peaq_ignore_events_counter--)
 		return;
 
 	if (obj.integer.value) {
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index b225731..2242d60 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -24,7 +24,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #define TPACPI_VERSION "0.25"
-#define TPACPI_SYSFS_VERSION 0x020700
+#define TPACPI_SYSFS_VERSION 0x030000
 
 /*
  *  Changelog:
@@ -6342,7 +6342,7 @@ static int __init thermal_init(struct ibm_init_struct *iibm)
 
 	switch (thermal_read_mode) {
 	case TPACPI_THERMAL_TPEC_16:
-		res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
+		res = sysfs_create_group(&tpacpi_hwmon->kobj,
 				&thermal_temp_input16_group);
 		if (res)
 			return res;
@@ -6350,7 +6350,7 @@ static int __init thermal_init(struct ibm_init_struct *iibm)
 	case TPACPI_THERMAL_TPEC_8:
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
-		res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
+		res = sysfs_create_group(&tpacpi_hwmon->kobj,
 				&thermal_temp_input8_group);
 		if (res)
 			return res;
@@ -6367,13 +6367,13 @@ static void thermal_exit(void)
 {
 	switch (thermal_read_mode) {
 	case TPACPI_THERMAL_TPEC_16:
-		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
+		sysfs_remove_group(&tpacpi_hwmon->kobj,
 				   &thermal_temp_input16_group);
 		break;
 	case TPACPI_THERMAL_TPEC_8:
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
-		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
+		sysfs_remove_group(&tpacpi_hwmon->kobj,
 				   &thermal_temp_input8_group);
 		break;
 	case TPACPI_THERMAL_NONE:
@@ -8696,7 +8696,7 @@ static int __init fan_init(struct ibm_init_struct *iibm)
 			fan_attributes[ARRAY_SIZE(fan_attributes)-2] =
 					&dev_attr_fan2_input.attr;
 		}
-		rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
+		rc = sysfs_create_group(&tpacpi_hwmon->kobj,
 					 &fan_attr_group);
 		if (rc < 0)
 			return rc;
@@ -8704,7 +8704,7 @@ static int __init fan_init(struct ibm_init_struct *iibm)
 		rc = driver_create_file(&tpacpi_hwmon_pdriver.driver,
 					&driver_attr_fan_watchdog);
 		if (rc < 0) {
-			sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
+			sysfs_remove_group(&tpacpi_hwmon->kobj,
 					&fan_attr_group);
 			return rc;
 		}
@@ -8719,7 +8719,7 @@ static void fan_exit(void)
 		    "cancelling any pending fan watchdog tasks\n");
 
 	/* FIXME: can we really do this unconditionally? */
-	sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group);
+	sysfs_remove_group(&tpacpi_hwmon->kobj, &fan_attr_group);
 	driver_remove_file(&tpacpi_hwmon_pdriver.driver,
 			   &driver_attr_fan_watchdog);
 
@@ -9149,16 +9149,6 @@ static void hotkey_driver_event(const unsigned int scancode)
 	tpacpi_driver_event(TP_HKEY_EV_HOTKEY_BASE + scancode);
 }
 
-/* sysfs name ---------------------------------------------------------- */
-static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME);
-}
-
-static DEVICE_ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
-
 /* --------------------------------------------------------------------- */
 
 /* /proc support */
@@ -9696,8 +9686,6 @@ static void thinkpad_acpi_module_exit(void)
 	if (tpacpi_hwmon)
 		hwmon_device_unregister(tpacpi_hwmon);
 
-	if (tp_features.sensors_pdev_attrs_registered)
-		device_remove_file(&tpacpi_sensors_pdev->dev, &dev_attr_name);
 	if (tpacpi_sensors_pdev)
 		platform_device_unregister(tpacpi_sensors_pdev);
 	if (tpacpi_pdev)
@@ -9818,14 +9806,10 @@ static int __init thinkpad_acpi_module_init(void)
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
-	ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_name);
-	if (ret) {
-		pr_err("unable to create sysfs hwmon device attributes\n");
-		thinkpad_acpi_module_exit();
-		return ret;
-	}
 	tp_features.sensors_pdev_attrs_registered = 1;
-	tpacpi_hwmon = hwmon_device_register(&tpacpi_sensors_pdev->dev);
+	tpacpi_hwmon = hwmon_device_register_with_groups(
+		&tpacpi_sensors_pdev->dev, TPACPI_NAME, NULL, NULL);
+
 	if (IS_ERR(tpacpi_hwmon)) {
 		ret = PTR_ERR(tpacpi_hwmon);
 		tpacpi_hwmon = NULL;
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index e32ba57..0765b17 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -218,7 +218,7 @@ u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out)
 	if (!(block->flags & ACPI_WMI_METHOD))
 		return AE_BAD_DATA;
 
-	if (block->instance_count < instance)
+	if (block->instance_count <= instance)
 		return AE_BAD_PARAMETER;
 
 	input.count = 2;
@@ -265,7 +265,7 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
 	block = &wblock->gblock;
 	handle = wblock->acpi_device->handle;
 
-	if (block->instance_count < instance)
+	if (block->instance_count <= instance)
 		return AE_BAD_PARAMETER;
 
 	/* Check GUID is a data block */
@@ -392,7 +392,7 @@ acpi_status wmi_set_block(const char *guid_string, u8 instance,
 	block = &wblock->gblock;
 	handle = wblock->acpi_device->handle;
 
-	if (block->instance_count < instance)
+	if (block->instance_count <= instance)
 		return AE_BAD_PARAMETER;
 
 	/* Check GUID is a data block */
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 04602cb..43799bd 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -39,7 +39,7 @@ struct aer_capability_regs {
 };
 
 #if defined(CONFIG_PCIEAER)
-/* pci-e port driver needs this function to enable aer */
+/* PCIe port driver needs this function to enable AER */
 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
@@ -67,7 +67,6 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
 		    struct aer_capability_regs *aer);
 int cper_severity_to_aer(int cper_severity);
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
-		       int severity,
-		       struct aer_capability_regs *aer_regs);
+		       int severity, struct aer_capability_regs *aer_regs);
 #endif //_AER_H_
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6383115..2e02885 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -307,7 +307,7 @@ DECLARE_PER_CPU(int, disable_stack_tracer);
 static inline void stack_tracer_disable(void)
 {
 	/* Preemption or interupts must be disabled */
-	if (IS_ENABLED(CONFIG_PREEMPT_DEBUG))
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT))
 		WARN_ON_ONCE(!preempt_count() || !irqs_disabled());
 	this_cpu_inc(disable_stack_tracer);
 }
@@ -320,7 +320,7 @@ static inline void stack_tracer_disable(void)
  */
 static inline void stack_tracer_enable(void)
 {
-	if (IS_ENABLED(CONFIG_PREEMPT_DEBUG))
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT))
 		WARN_ON_ONCE(!preempt_count() || !irqs_disabled());
 	this_cpu_dec(disable_stack_tracer);
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 21a6fd6..6882538 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -720,7 +720,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
-void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
@@ -800,6 +800,7 @@ int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
@@ -985,6 +986,12 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu,
+						gpa_t gpa)
+{
+	return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa));
+}
+
 static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
 {
 	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index af5edbf..f7a04e1 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -62,11 +62,13 @@ struct pci_epc_ops {
  * @size: the size of the PCI address space
  * @bitmap: bitmap to manage the PCI address space
  * @pages: number of bits representing the address region
+ * @page_size: size of each page
  */
 struct pci_epc_mem {
 	phys_addr_t	phys_base;
 	size_t		size;
 	unsigned long	*bitmap;
+	size_t		page_size;
 	int		pages;
 };
 
@@ -98,6 +100,9 @@ struct pci_epc {
 #define devm_pci_epc_create(dev, ops)    \
 		__devm_pci_epc_create((dev), (ops), THIS_MODULE)
 
+#define pci_epc_mem_init(epc, phys_addr, size)	\
+		__pci_epc_mem_init((epc), (phys_addr), (size), PAGE_SIZE)
+
 static inline void epc_set_drvdata(struct pci_epc *epc, void *data)
 {
 	dev_set_drvdata(&epc->dev, data);
@@ -135,7 +140,8 @@ void pci_epc_stop(struct pci_epc *epc);
 struct pci_epc *pci_epc_get(const char *epc_name);
 void pci_epc_put(struct pci_epc *epc);
 
-int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size);
+int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size,
+		       size_t page_size);
 void pci_epc_mem_exit(struct pci_epc *epc);
 void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
 				     phys_addr_t *phys_addr, size_t size);
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 0d529cb..60d551a 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -14,17 +14,10 @@
 
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
+#include <linux/pci.h>
 
 struct pci_epf;
 
-enum pci_interrupt_pin {
-	PCI_INTERRUPT_UNKNOWN,
-	PCI_INTERRUPT_INTA,
-	PCI_INTERRUPT_INTB,
-	PCI_INTERRUPT_INTC,
-	PCI_INTERRUPT_INTD,
-};
-
 enum pci_barno {
 	BAR_0,
 	BAR_1,
@@ -149,6 +142,8 @@ static inline void *epf_get_drvdata(struct pci_epf *epf)
 	return dev_get_drvdata(&epf->dev);
 }
 
+const struct pci_epf_device_id *
+pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf);
 struct pci_epf *pci_epf_create(const char *name);
 void pci_epf_destroy(struct pci_epf *epf);
 int __pci_epf_register_driver(struct pci_epf_driver *driver,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index da05e5d..f68c58a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -102,6 +102,28 @@ enum {
 	DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES,
 };
 
+/**
+ * enum pci_interrupt_pin - PCI INTx interrupt values
+ * @PCI_INTERRUPT_UNKNOWN: Unknown or unassigned interrupt
+ * @PCI_INTERRUPT_INTA: PCI INTA pin
+ * @PCI_INTERRUPT_INTB: PCI INTB pin
+ * @PCI_INTERRUPT_INTC: PCI INTC pin
+ * @PCI_INTERRUPT_INTD: PCI INTD pin
+ *
+ * Corresponds to values for legacy PCI INTx interrupts, as can be found in the
+ * PCI_INTERRUPT_PIN register.
+ */
+enum pci_interrupt_pin {
+	PCI_INTERRUPT_UNKNOWN,
+	PCI_INTERRUPT_INTA,
+	PCI_INTERRUPT_INTB,
+	PCI_INTERRUPT_INTC,
+	PCI_INTERRUPT_INTD,
+};
+
+/* The number of legacy PCI INTx interrupts */
+#define PCI_NUM_INTX	4
+
 /*
  * pci_power_t values must match the bits in the Capabilities PME_Support
  * and Control/Status PowerState fields in the Power Management capability.
@@ -453,6 +475,7 @@ struct pci_host_bridge {
 	void *release_data;
 	struct msi_controller *msi;
 	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
+	unsigned int no_ext_tags:1;		/* no Extended Tags */
 	/* Resource alignment requirements */
 	resource_size_t (*align_resource)(struct pci_dev *dev,
 			const struct resource *res,
@@ -847,7 +870,6 @@ char *pcibios_setup(char *str);
 resource_size_t pcibios_align_resource(void *, const struct resource *,
 				resource_size_t,
 				resource_size_t);
-void pcibios_update_irq(struct pci_dev *, int irq);
 
 /* Weak but can be overriden by arch */
 void pci_fixup_cardbus(struct pci_bus *);
@@ -1165,8 +1187,6 @@ void pci_assign_unassigned_bus_resources(struct pci_bus *bus);
 void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus);
 void pdev_enable_device(struct pci_dev *);
 int pci_enable_resources(struct pci_dev *, int mask);
-void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
-		    int (*)(const struct pci_dev *, u8, u8));
 void pci_assign_irq(struct pci_dev *dev);
 struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res);
 #define HAVE_PCI_REQ_REGIONS	2
@@ -1399,6 +1419,38 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
 					      NULL);
 }
 
+/**
+ * pci_irqd_intx_xlate() - Translate PCI INTx value to an IRQ domain hwirq
+ * @d: the INTx IRQ domain
+ * @node: the DT node for the device whose interrupt we're translating
+ * @intspec: the interrupt specifier data from the DT
+ * @intsize: the number of entries in @intspec
+ * @out_hwirq: pointer at which to write the hwirq number
+ * @out_type: pointer at which to write the interrupt type
+ *
+ * Translate a PCI INTx interrupt number from device tree in the range 1-4, as
+ * stored in the standard PCI_INTERRUPT_PIN register, to a value in the range
+ * 0-3 suitable for use in a 4 entry IRQ domain. That is, subtract one from the
+ * INTx value to obtain the hwirq number.
+ *
+ * Returns 0 on success, or -EINVAL if the interrupt specifier is out of range.
+ */
+static inline int pci_irqd_intx_xlate(struct irq_domain *d,
+				      struct device_node *node,
+				      const u32 *intspec,
+				      unsigned int intsize,
+				      unsigned long *out_hwirq,
+				      unsigned int *out_type)
+{
+	const u32 intx = intspec[0];
+
+	if (intx < PCI_INTERRUPT_INTA || intx > PCI_INTERRUPT_INTD)
+		return -EINVAL;
+
+	*out_hwirq = intx - PCI_INTERRUPT_INTA;
+	return 0;
+}
+
 #ifdef CONFIG_PCIEPORTBUS
 extern bool pcie_ports_disabled;
 extern bool pcie_ports_auto;
@@ -2064,7 +2116,7 @@ static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt)
 
 /**
  * pci_vpd_srdt_size - Extracts the Small Resource Data Type length
- * @lrdt: Pointer to the beginning of the Small Resource Data Type tag
+ * @srdt: Pointer to the beginning of the Small Resource Data Type tag
  *
  * Returns the extracted Small Resource Data Type length.
  */
@@ -2075,7 +2127,7 @@ static inline u8 pci_vpd_srdt_size(const u8 *srdt)
 
 /**
  * pci_vpd_srdt_tag - Extracts the Small Resource Data Type Tag Item
- * @lrdt: Pointer to the beginning of the Small Resource Data Type tag
+ * @srdt: Pointer to the beginning of the Small Resource Data Type tag
  *
  * Returns the extracted Small Resource Data Type Tag Item.
  */
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index afcd130..e8f3f74 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -38,7 +38,7 @@ static inline void set_service_data(struct pcie_device *dev, void *data)
 	dev->priv_data = data;
 }
 
-static inline void* get_service_data(struct pcie_device *dev)
+static inline void *get_service_data(struct pcie_device *dev)
 {
 	return dev->priv_data;
 }
@@ -50,8 +50,8 @@ struct pcie_port_service_driver {
 	int (*suspend) (struct pcie_device *dev);
 	int (*resume) (struct pcie_device *dev);
 
-	/* Service Error Recovery Handler */
-	const struct pci_error_handlers *err_handler;
+	/* Device driver may resume normal operations */
+	void (*error_resume)(struct pci_dev *dev);
 
 	/* Link Reset Capability - AER service driver specific */
 	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 5012b52..7f11050 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -217,7 +217,6 @@ enum {
 	TRACE_EVENT_FL_CAP_ANY_BIT,
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
-	TRACE_EVENT_FL_WAS_ENABLED_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
 	TRACE_EVENT_FL_KPROBE_BIT,
 	TRACE_EVENT_FL_UPROBE_BIT,
@@ -229,9 +228,6 @@ enum {
  *  CAP_ANY	  - Any user can enable for perf
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
- *  WAS_ENABLED   - Set and stays set when an event was ever enabled
- *                    (used for module unloading, if a module event is enabled,
- *                     it is best to clear the buffers that used it).
  *  TRACEPOINT    - Event is a tracepoint
  *  KPROBE        - Event is a kprobe
  *  UPROBE        - Event is a uprobe
@@ -241,7 +237,6 @@ enum {
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
-	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
@@ -306,6 +301,7 @@ enum {
 	EVENT_FILE_FL_TRIGGER_MODE_BIT,
 	EVENT_FILE_FL_TRIGGER_COND_BIT,
 	EVENT_FILE_FL_PID_FILTER_BIT,
+	EVENT_FILE_FL_WAS_ENABLED_BIT,
 };
 
 /*
@@ -321,6 +317,7 @@ enum {
  *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
  *  PID_FILTER    - When set, the event is filtered based on pid
+ *  WAS_ENABLED   - Set when enabled to know to clear trace on module removal
  */
 enum {
 	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -333,6 +330,7 @@ enum {
 	EVENT_FILE_FL_TRIGGER_MODE	= (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
 	EVENT_FILE_FL_TRIGGER_COND	= (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
 	EVENT_FILE_FL_PID_FILTER	= (1 << EVENT_FILE_FL_PID_FILTER_BIT),
+	EVENT_FILE_FL_WAS_ENABLED	= (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
 };
 
 struct trace_event_file {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6cd63c1..8388875 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
 struct kvm_ppc_smmu_info {
 	__u64 flags;
 	__u32 slb_size;
-	__u32 pad;
+	__u16 data_keys;	/* # storage keys supported for data */
+	__u16 instr_keys;	/* # storage keys supported for instructions */
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index c22d3eb..f8d5804 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -513,6 +513,7 @@
 #define  PCI_EXP_DEVSTA_URD	0x0008	/* Unsupported Request Detected */
 #define  PCI_EXP_DEVSTA_AUXPD	0x0010	/* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND	0x0020	/* Transactions Pending */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1	12	/* v1 endpoints without link end here */
 #define PCI_EXP_LNKCAP		12	/* Link Capabilities */
 #define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
@@ -556,7 +557,7 @@
 #define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
 #define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
 #define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1	20	/* v1 endpoints end here */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1	20	/* v1 endpoints with link end here */
 #define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
 #define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
 #define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
@@ -639,7 +640,7 @@
 #define  PCI_EXP_DEVCTL2_OBFF_MSGB_EN	0x4000	/* Enable OBFF Message type B */
 #define  PCI_EXP_DEVCTL2_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
 #define PCI_EXP_DEVSTA2		42	/* Device Status 2 */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	44	/* v2 endpoints end here */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2	44	/* v2 endpoints without link end here */
 #define PCI_EXP_LNKCAP2		44	/* Link Capabilities 2 */
 #define  PCI_EXP_LNKCAP2_SLS_2_5GB	0x00000002 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_5_0GB	0x00000004 /* Supported Speed 5.0GT/s */
@@ -647,6 +648,7 @@
 #define  PCI_EXP_LNKCAP2_CROSSLINK	0x00000100 /* Crosslink supported */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define PCI_EXP_LNKSTA2		50	/* Link Status 2 */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2	52	/* v2 endpoints with link end here */
 #define PCI_EXP_SLTCAP2		52	/* Slot Capabilities 2 */
 #define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 #define PCI_EXP_SLTSTA2		58	/* Slot Status 2 */
@@ -733,23 +735,17 @@
 #define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
 #define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
 #define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
-/* Correctable Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_COR_EN		0x00000001
-/* Non-fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002
-/* Fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004
+#define PCI_ERR_ROOT_CMD_COR_EN		0x00000001 /* Correctable Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002 /* Non-Fatal Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004 /* Fatal Err Reporting Enable */
 #define PCI_ERR_ROOT_STATUS	48
-#define PCI_ERR_ROOT_COR_RCV		0x00000001	/* ERR_COR Received */
-/* Multi ERR_COR Received */
-#define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002
-/* ERR_FATAL/NONFATAL Received */
-#define PCI_ERR_ROOT_UNCOR_RCV		0x00000004
-/* Multi ERR_FATAL/NONFATAL Received */
-#define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008
-#define PCI_ERR_ROOT_FIRST_FATAL	0x00000010	/* First Fatal */
-#define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020	/* Non-Fatal Received */
-#define PCI_ERR_ROOT_FATAL_RCV		0x00000040	/* Fatal Received */
+#define PCI_ERR_ROOT_COR_RCV		0x00000001 /* ERR_COR Received */
+#define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002 /* Multiple ERR_COR */
+#define PCI_ERR_ROOT_UNCOR_RCV		0x00000004 /* ERR_FATAL/NONFATAL */
+#define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008 /* Multiple FATAL/NONFATAL */
+#define PCI_ERR_ROOT_FIRST_FATAL	0x00000010 /* First UNC is Fatal */
+#define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020 /* Non-Fatal Received */
+#define PCI_ERR_ROOT_FATAL_RCV		0x00000040 /* Fatal Received */
 #define PCI_ERR_ROOT_ERR_SRC	52	/* Error Source Identification */
 
 /* Virtual Channel */
@@ -967,6 +963,7 @@
 #define  PCI_EXP_DPC_CAP_RP_EXT		0x20	/* Root Port Extensions for DPC */
 #define  PCI_EXP_DPC_CAP_POISONED_TLP	0x40	/* Poisoned TLP Egress Blocking Supported */
 #define  PCI_EXP_DPC_CAP_SW_TRIGGER	0x80	/* Software Triggering Supported */
+#define  PCI_EXP_DPC_RP_PIO_LOG_SIZE	0xF00	/* RP PIO log size */
 #define  PCI_EXP_DPC_CAP_DL_ACTIVE	0x1000	/* ERR_COR signal on DL_Active supported */
 
 #define PCI_EXP_DPC_CTL			6	/* DPC control */
@@ -980,6 +977,15 @@
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
+#define PCI_EXP_DPC_RP_PIO_STATUS	 0x0C	/* RP PIO Status */
+#define PCI_EXP_DPC_RP_PIO_MASK		 0x10	/* RP PIO MASK */
+#define PCI_EXP_DPC_RP_PIO_SEVERITY	 0x14	/* RP PIO Severity */
+#define PCI_EXP_DPC_RP_PIO_SYSERROR	 0x18	/* RP PIO SysError */
+#define PCI_EXP_DPC_RP_PIO_EXCEPTION	 0x1C	/* RP PIO Exception */
+#define PCI_EXP_DPC_RP_PIO_HEADER_LOG	 0x20	/* RP PIO Header Log */
+#define PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG	 0x30	/* RP PIO ImpSpec Log */
+#define PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG 0x34	/* RP PIO TLP Prefix Log */
+
 /* Precision Time Measurement */
 #define PCI_PTM_CAP			0x04	    /* PTM Capability */
 #define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 96cea88..6abfafd 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2828,13 +2828,14 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 
 	if (!command || !ftrace_enabled) {
 		/*
-		 * If these are per_cpu ops, they still need their
-		 * per_cpu field freed. Since, function tracing is
+		 * If these are dynamic or per_cpu ops, they still
+		 * need their data freed. Since, function tracing is
 		 * not currently active, we can just free them
 		 * without synchronizing all CPUs.
 		 */
-		if (ops->flags & FTRACE_OPS_FL_PER_CPU)
-			per_cpu_ops_free(ops);
+		if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU))
+			goto free_ops;
+
 		return 0;
 	}
 
@@ -2900,6 +2901,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 		if (IS_ENABLED(CONFIG_PREEMPT))
 			synchronize_rcu_tasks();
 
+ free_ops:
 		arch_ftrace_trampoline_free(ops);
 
 		if (ops->flags & FTRACE_OPS_FL_PER_CPU)
@@ -5690,10 +5692,51 @@ static int referenced_filters(struct dyn_ftrace *rec)
 	return cnt;
 }
 
+static void
+clear_mod_from_hash(struct ftrace_page *pg, struct ftrace_hash *hash)
+{
+	struct ftrace_func_entry *entry;
+	struct dyn_ftrace *rec;
+	int i;
+
+	if (ftrace_hash_empty(hash))
+		return;
+
+	for (i = 0; i < pg->index; i++) {
+		rec = &pg->records[i];
+		entry = __ftrace_lookup_ip(hash, rec->ip);
+		/*
+		 * Do not allow this rec to match again.
+		 * Yeah, it may waste some memory, but will be removed
+		 * if/when the hash is modified again.
+		 */
+		if (entry)
+			entry->ip = 0;
+	}
+}
+
+/* Clear any records from hashs */
+static void clear_mod_from_hashes(struct ftrace_page *pg)
+{
+	struct trace_array *tr;
+
+	mutex_lock(&trace_types_lock);
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		if (!tr->ops || !tr->ops->func_hash)
+			continue;
+		mutex_lock(&tr->ops->func_hash->regex_lock);
+		clear_mod_from_hash(pg, tr->ops->func_hash->filter_hash);
+		clear_mod_from_hash(pg, tr->ops->func_hash->notrace_hash);
+		mutex_unlock(&tr->ops->func_hash->regex_lock);
+	}
+	mutex_unlock(&trace_types_lock);
+}
+
 void ftrace_release_mod(struct module *mod)
 {
 	struct dyn_ftrace *rec;
 	struct ftrace_page **last_pg;
+	struct ftrace_page *tmp_page = NULL;
 	struct ftrace_page *pg;
 	int order;
 
@@ -5723,14 +5766,25 @@ void ftrace_release_mod(struct module *mod)
 
 			ftrace_update_tot_cnt -= pg->index;
 			*last_pg = pg->next;
-			order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-			free_pages((unsigned long)pg->records, order);
-			kfree(pg);
+
+			pg->next = tmp_page;
+			tmp_page = pg;
 		} else
 			last_pg = &pg->next;
 	}
  out_unlock:
 	mutex_unlock(&ftrace_lock);
+
+	for (pg = tmp_page; pg; pg = tmp_page) {
+
+		/* Needs to be called outside of ftrace_lock */
+		clear_mod_from_hashes(pg);
+
+		order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+		free_pages((unsigned long)pg->records, order);
+		tmp_page = pg->next;
+		kfree(pg);
+	}
 }
 
 void ftrace_module_enable(struct module *mod)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 44004d8..5360b7a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1702,6 +1702,9 @@ void tracing_reset_all_online_cpus(void)
 	struct trace_array *tr;
 
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		if (!tr->clear_trace)
+			continue;
+		tr->clear_trace = false;
 		tracing_reset_online_cpus(&tr->trace_buffer);
 #ifdef CONFIG_TRACER_MAX_TRACE
 		tracing_reset_online_cpus(&tr->max_buffer);
@@ -2799,11 +2802,17 @@ static char *get_trace_buf(void)
 	if (!buffer || buffer->nesting >= 4)
 		return NULL;
 
-	return &buffer->buffer[buffer->nesting++][0];
+	buffer->nesting++;
+
+	/* Interrupts must see nesting incremented before we use the buffer */
+	barrier();
+	return &buffer->buffer[buffer->nesting][0];
 }
 
 static void put_trace_buf(void)
 {
+	/* Don't let the decrement of nesting leak before this */
+	barrier();
 	this_cpu_dec(trace_percpu_buffer->nesting);
 }
 
@@ -6220,7 +6229,7 @@ static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
 	tracing_reset_online_cpus(&tr->trace_buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
+	if (tr->max_buffer.buffer)
 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
 	tracing_reset_online_cpus(&tr->max_buffer);
 #endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 490ba22..fb5d54d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -245,6 +245,7 @@ struct trace_array {
 	int			stop_count;
 	int			clock_id;
 	int			nr_topts;
+	bool			clear_trace;
 	struct tracer		*current_trace;
 	unsigned int		trace_flags;
 	unsigned char		trace_flags_index[TRACE_FLAGS_MAX_SIZE];
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 36132f9..8746839 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -406,7 +406,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 
 			if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
 				tracing_stop_tgid_record();
-				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
+				clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 			}
 
 			call->class->reg(call, TRACE_REG_UNREGISTER, file);
@@ -466,7 +466,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 			set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 
 			/* WAS_ENABLED gets set but never cleared. */
-			call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
+			set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
 		}
 		break;
 	}
@@ -2058,6 +2058,10 @@ static void event_remove(struct trace_event_call *call)
 	do_for_each_event_file(tr, file) {
 		if (file->event_call != call)
 			continue;
+
+		if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
+			tr->clear_trace = true;
+
 		ftrace_event_enable_disable(file, 0);
 		/*
 		 * The do_for_each_event_file() is
@@ -2396,15 +2400,11 @@ static void trace_module_add_events(struct module *mod)
 static void trace_module_remove_events(struct module *mod)
 {
 	struct trace_event_call *call, *p;
-	bool clear_trace = false;
 
 	down_write(&trace_event_sem);
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
-		if (call->mod == mod) {
-			if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
-				clear_trace = true;
+		if (call->mod == mod)
 			__trace_remove_event_call(call);
-		}
 	}
 	up_write(&trace_event_sem);
 
@@ -2416,8 +2416,7 @@ static void trace_module_remove_events(struct module *mod)
 	 * over from this module may be passed to the new module events and
 	 * unexpected results may occur.
 	 */
-	if (clear_trace)
-		tracing_reset_all_online_cpus();
+	tracing_reset_all_online_cpus();
 }
 
 static int trace_module_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index cb917ce..b17ec64 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -273,7 +273,7 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt)
 		goto out_free;
 	if (cnt > 1) {
 		if (trace_selftest_test_global_cnt == 0)
-			goto out;
+			goto out_free;
 	}
 	if (trace_selftest_test_dyn_cnt == 0)
 		goto out_free;
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index dcc0d5f..5e975fe 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -194,15 +194,6 @@
 
 ifdef REGENERATE_PARSERS
 
-# GPERF
-# ---------------------------------------------------------------------------
-quiet_cmd_gperf = GPERF $@
-      cmd_gperf = gperf -t --output-file $@ -a -C -E -g -k 1,3,$$ -p -t $<
-
-.PRECIOUS: $(src)/%.hash.c_shipped
-$(src)/%.hash.c_shipped: $(src)/%.gperf
-	$(call cmd,gperf)
-
 # LEX
 # ---------------------------------------------------------------------------
 LEX_PREFIX = $(if $(LEX_PREFIX_${baseprereq}),$(LEX_PREFIX_${baseprereq}),yy)
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
index 5dc25ee..d29c774 100644
--- a/scripts/genksyms/lex.l
+++ b/scripts/genksyms/lex.l
@@ -292,7 +292,7 @@
     case ST_TYPEOF_1:
       if (token == IDENT)
 	{
-	  if (is_reserved_word(yytext, yyleng)
+	  if (is_reserved_word(yytext, yyleng) >= 0
 	      || find_symbol(yytext, SYM_TYPEDEF, 1))
 	    {
 	      yyless(0);
diff --git a/scripts/genksyms/lex.lex.c_shipped b/scripts/genksyms/lex.lex.c_shipped
index d5a7474..ba2fda8 100644
--- a/scripts/genksyms/lex.lex.c_shipped
+++ b/scripts/genksyms/lex.lex.c_shipped
@@ -2101,7 +2101,7 @@
     case ST_TYPEOF_1:
       if (token == IDENT)
 	{
-	  if (is_reserved_word(yytext, yyleng)
+	  if (is_reserved_word(yytext, yyleng) >= 0
 	      || find_symbol(yytext, SYM_TYPEDEF, 1))
 	    {
 	      yyless(0);
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index ad54a58..9074b47 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -173,6 +173,7 @@ int main(int argc, char **argv)
 			"\t-D <dev>		PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
 			"\t-b <bar num>		BAR test (bar number between 0..5)\n"
 			"\t-m <msi num>		MSI test (msi number between 1..32)\n"
+			"\t-l			Legacy IRQ test\n"
 			"\t-r			Read buffer test\n"
 			"\t-w			Write buffer test\n"
 			"\t-c			Copy buffer test\n"
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
index f633561..901b85ea 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -367,11 +367,11 @@ static void launch_tests(void)
 
 	/* Icebp traps */
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	check_success("Test icebp");
+	check_success("Test icebp\n");
 
 	/* Int 3 traps */
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	check_success("Test int 3 trap");
+	check_success("Test int 3 trap\n");
 
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
 }
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index 763f37f..cf67784 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -1,7 +1,6 @@
 #define _GNU_SOURCE
 
 #include <cap-ng.h>
-#include <err.h>
 #include <linux/capability.h>
 #include <stdbool.h>
 #include <string.h>
@@ -18,6 +17,8 @@
 #include <sys/prctl.h>
 #include <sys/stat.h>
 
+#include "../kselftest.h"
+
 #ifndef PR_CAP_AMBIENT
 #define PR_CAP_AMBIENT			47
 # define PR_CAP_AMBIENT_IS_SET		1
@@ -27,6 +28,7 @@
 #endif
 
 static int nerrs;
+static pid_t mpid;	/*  main() pid is used to avoid duplicate test counts */
 
 static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
 {
@@ -36,29 +38,32 @@ static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list
 	int buf_len;
 
 	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
-	if (buf_len < 0) {
-		err(1, "vsnprintf failed");
-	}
-	if (buf_len >= sizeof(buf)) {
-		errx(1, "vsnprintf output truncated");
-	}
+	if (buf_len < 0)
+		ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno));
+
+	if (buf_len >= sizeof(buf))
+		ksft_exit_fail_msg("vsnprintf output truncated\n");
+
 
 	fd = open(filename, O_WRONLY);
 	if (fd < 0) {
 		if ((errno == ENOENT) && enoent_ok)
 			return;
-		err(1, "open of %s failed", filename);
+		ksft_exit_fail_msg("open of %s failed - %s\n",
+					filename, strerror(errno));
 	}
 	written = write(fd, buf, buf_len);
 	if (written != buf_len) {
 		if (written >= 0) {
-			errx(1, "short write to %s", filename);
+			ksft_exit_fail_msg("short write to %s\n", filename);
 		} else {
-			err(1, "write to %s failed", filename);
+			ksft_exit_fail_msg("write to %s failed - %s\n",
+						filename, strerror(errno));
 		}
 	}
 	if (close(fd) != 0) {
-		err(1, "close of %s failed", filename);
+		ksft_exit_fail_msg("close of %s failed - %s\n",
+					filename, strerror(errno));
 	}
 }
 
@@ -95,11 +100,12 @@ static bool create_and_enter_ns(uid_t inner_uid)
 	 */
 
 	if (unshare(CLONE_NEWNS) == 0) {
-		printf("[NOTE]\tUsing global UIDs for tests\n");
+		ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
 		if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
-			err(1, "PR_SET_KEEPCAPS");
+			ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
+						strerror(errno));
 		if (setresuid(inner_uid, inner_uid, -1) != 0)
-			err(1, "setresuid");
+			ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
 
 		// Re-enable effective caps
 		capng_get_caps_process();
@@ -107,22 +113,24 @@ static bool create_and_enter_ns(uid_t inner_uid)
 			if (capng_have_capability(CAPNG_PERMITTED, i))
 				capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
 		if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-			err(1, "capng_apply");
+			ksft_exit_fail_msg(
+					"capng_apply - %s\n", strerror(errno));
 
 		have_outer_privilege = true;
 	} else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
-		printf("[NOTE]\tUsing a user namespace for tests\n");
+		ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n");
 		maybe_write_file("/proc/self/setgroups", "deny");
 		write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
 		write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
 
 		have_outer_privilege = false;
 	} else {
-		errx(1, "must be root or be able to create a userns");
+		ksft_exit_skip("must be root or be able to create a userns\n");
 	}
 
 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
-		err(1, "remount everything private");
+		ksft_exit_fail_msg("remount everything private - %s\n",
+					strerror(errno));
 
 	return have_outer_privilege;
 }
@@ -131,20 +139,22 @@ static void chdir_to_tmpfs(void)
 {
 	char cwd[PATH_MAX];
 	if (getcwd(cwd, sizeof(cwd)) != cwd)
-		err(1, "getcwd");
+		ksft_exit_fail_msg("getcwd - %s\n", strerror(errno));
 
 	if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
-		err(1, "mount private tmpfs");
+		ksft_exit_fail_msg("mount private tmpfs - %s\n",
+					strerror(errno));
 
 	if (chdir(cwd) != 0)
-		err(1, "chdir to private tmpfs");
+		ksft_exit_fail_msg("chdir to private tmpfs - %s\n",
+					strerror(errno));
 }
 
 static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
 {
 	int from = openat(fromfd, fromname, O_RDONLY);
 	if (from == -1)
-		err(1, "open copy source");
+		ksft_exit_fail_msg("open copy source - %s\n", strerror(errno));
 
 	int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
 
@@ -154,10 +164,11 @@ static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
 		if (sz == 0)
 			break;
 		if (sz < 0)
-			err(1, "read");
+			ksft_exit_fail_msg("read - %s\n", strerror(errno));
 
 		if (write(to, buf, sz) != sz)
-			err(1, "write");	/* no short writes on tmpfs */
+			/* no short writes on tmpfs */
+			ksft_exit_fail_msg("write - %s\n", strerror(errno));
 	}
 
 	close(from);
@@ -174,18 +185,20 @@ static bool fork_wait(void)
 		int status;
 		if (waitpid(child, &status, 0) != child ||
 		    !WIFEXITED(status)) {
-			printf("[FAIL]\tChild died\n");
+			ksft_print_msg("Child died\n");
 			nerrs++;
 		} else if (WEXITSTATUS(status) != 0) {
-			printf("[FAIL]\tChild failed\n");
+			ksft_print_msg("Child failed\n");
 			nerrs++;
 		} else {
-			printf("[OK]\tChild succeeded\n");
+			/* don't print this message for mpid */
+			if (getpid() != mpid)
+				ksft_test_result_pass("Passed\n");
 		}
-
 		return false;
 	} else {
-		err(1, "fork");
+		ksft_exit_fail_msg("fork - %s\n", strerror(errno));
+		return false;
 	}
 }
 
@@ -195,7 +208,7 @@ static void exec_other_validate_cap(const char *name,
 	execl(name, name, (eff ? "1" : "0"),
 	      (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
 	      NULL);
-	err(1, "execl");
+	ksft_exit_fail_msg("execl - %s\n", strerror(errno));
 }
 
 static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
@@ -209,7 +222,8 @@ static int do_tests(int uid, const char *our_path)
 
 	int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
 	if (ourpath_fd == -1)
-		err(1, "open '%s'", our_path);
+		ksft_exit_fail_msg("open '%s' - %s\n",
+					our_path, strerror(errno));
 
 	chdir_to_tmpfs();
 
@@ -221,30 +235,30 @@ static int do_tests(int uid, const char *our_path)
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_suidroot");
 		if (chown("validate_cap_suidroot", 0, -1) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_suidnonroot");
 		if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_sgidroot");
 		if (chown("validate_cap_sgidroot", -1, 0) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_sgidnonroot");
 		if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 	}
 
 	capng_get_caps_process();
@@ -252,147 +266,162 @@ static int do_tests(int uid, const char *our_path)
 	/* Make sure that i starts out clear */
 	capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 
 	if (uid == 0) {
-		printf("[RUN]\tRoot => ep\n");
+		ksft_print_msg("[RUN]\tRoot => ep\n");
 		if (fork_wait())
 			exec_validate_cap(true, true, false, false);
 	} else {
-		printf("[RUN]\tNon-root => no caps\n");
+		ksft_print_msg("[RUN]\tNon-root => no caps\n");
 		if (fork_wait())
 			exec_validate_cap(false, false, false, false);
 	}
 
-	printf("[OK]\tCheck cap_ambient manipulation rules\n");
+	ksft_print_msg("Check cap_ambient manipulation rules\n");
 
 	/* We should not be able to add ambient caps yet. */
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
 		if (errno == EINVAL)
-			printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n");
+			ksft_test_result_fail(
+				"PR_CAP_AMBIENT_RAISE isn't supported\n");
 		else
-			printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+			ksft_test_result_fail(
+				"PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
 		return 1;
 	}
-	printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+	ksft_test_result_pass(
+		"PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
 
 	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
 	capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
 	capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
 		return 1;
 	}
-	printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+	ksft_test_result_pass(
+		"PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
 
 	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n");
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_RAISE should have succeeded\n");
 		return 1;
 	}
-	printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n");
+	ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n");
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n");
+		ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n");
 		return 1;
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
-		err(1, "PR_CAP_AMBIENT_CLEAR_ALL");
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n",
+					strerror(errno));
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
 		return 1;
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
-		err(1, "PR_CAP_AMBIENT_RAISE");
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+					strerror(errno));
 
 	capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
-		printf("[FAIL]\tDropping I should have dropped A\n");
+		ksft_test_result_fail("Dropping I should have dropped A\n");
 		return 1;
 	}
 
-	printf("[OK]\tBasic manipulation appears to work\n");
+	ksft_test_result_pass("Basic manipulation appears to work\n");
 
 	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 	if (uid == 0) {
-		printf("[RUN]\tRoot +i => eip\n");
+		ksft_print_msg("[RUN]\tRoot +i => eip\n");
 		if (fork_wait())
 			exec_validate_cap(true, true, true, false);
 	} else {
-		printf("[RUN]\tNon-root +i => i\n");
+		ksft_print_msg("[RUN]\tNon-root +i => i\n");
 		if (fork_wait())
 			exec_validate_cap(false, false, true, false);
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
-		err(1, "PR_CAP_AMBIENT_RAISE");
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+					strerror(errno));
 
-	printf("[RUN]\tUID %d +ia => eipa\n", uid);
+	ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid);
 	if (fork_wait())
 		exec_validate_cap(true, true, true, true);
 
 	/* The remaining tests need real privilege */
 
 	if (!have_outer_privilege) {
-		printf("[SKIP]\tSUID/SGID tests (needs privilege)\n");
+		ksft_test_result_skip("SUID/SGID tests (needs privilege)\n");
 		goto done;
 	}
 
 	if (uid == 0) {
-		printf("[RUN]\tRoot +ia, suidroot => eipa\n");
+		ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_suidroot",
 						true, true, true, true);
 
-		printf("[RUN]\tRoot +ia, suidnonroot => ip\n");
+		ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_suidnonroot",
 						false, true, true, false);
 
-		printf("[RUN]\tRoot +ia, sgidroot => eipa\n");
+		ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_sgidroot",
 						true, true, true, true);
 
 		if (fork_wait()) {
-			printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+			ksft_print_msg(
+				"[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
 			if (setresgid(1, 1, 1) != 0)
-				err(1, "setresgid");
+				ksft_exit_fail_msg("setresgid - %s\n",
+							strerror(errno));
 			exec_other_validate_cap("./validate_cap_sgidroot",
 						true, true, true, false);
 		}
 
-		printf("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+		ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_sgidnonroot",
 						true, true, true, false);
 	} else {
-		printf("[RUN]\tNon-root +ia, sgidnonroot => i\n");
-		exec_other_validate_cap("./validate_cap_sgidnonroot",
+		ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_sgidnonroot",
 					false, false, true, false);
 
 		if (fork_wait()) {
-			printf("[RUN]\tNon-root +ia, sgidroot => i\n");
+			ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n");
 			if (setresgid(1, 1, 1) != 0)
-				err(1, "setresgid");
+				ksft_exit_fail_msg("setresgid - %s\n",
+							strerror(errno));
 			exec_other_validate_cap("./validate_cap_sgidroot",
 						false, false, true, false);
 		}
 	}
 
 done:
+	ksft_print_cnts();
 	return nerrs ? 1 : 0;
 }
 
@@ -400,23 +429,29 @@ int main(int argc, char **argv)
 {
 	char *tmp1, *tmp2, *our_path;
 
+	ksft_print_header();
+
 	/* Find our path */
 	tmp1 = strdup(argv[0]);
 	if (!tmp1)
-		err(1, "strdup");
+		ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
 	tmp2 = dirname(tmp1);
 	our_path = strdup(tmp2);
 	if (!our_path)
-		err(1, "strdup");
+		ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
 	free(tmp1);
 
+	mpid = getpid();
+
 	if (fork_wait()) {
-		printf("[RUN]\t+++ Tests with uid == 0 +++\n");
+		ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n");
 		return do_tests(0, our_path);
 	}
 
+	ksft_print_msg("==================================================\n");
+
 	if (fork_wait()) {
-		printf("[RUN]\t+++ Tests with uid != 0 +++\n");
+		ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n");
 		return do_tests(1, our_path);
 	}
 
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index dd3c45f..694cd73 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -1,5 +1,4 @@
 #include <cap-ng.h>
-#include <err.h>
 #include <linux/capability.h>
 #include <stdbool.h>
 #include <string.h>
@@ -7,6 +6,8 @@
 #include <sys/prctl.h>
 #include <sys/auxv.h>
 
+#include "../kselftest.h"
+
 #ifndef PR_CAP_AMBIENT
 #define PR_CAP_AMBIENT			47
 # define PR_CAP_AMBIENT_IS_SET		1
@@ -25,8 +26,10 @@ static bool bool_arg(char **argv, int i)
 		return false;
 	else if (!strcmp(argv[i], "1"))
 		return true;
-	else
-		errx(1, "wrong argv[%d]", i);
+	else {
+		ksft_exit_fail_msg("wrong argv[%d]\n", i);
+		return false;
+	}
 }
 
 int main(int argc, char **argv)
@@ -39,7 +42,7 @@ int main(int argc, char **argv)
 	 */
 
 	if (argc != 5)
-		errx(1, "wrong argc");
+		ksft_exit_fail_msg("wrong argc\n");
 
 #ifdef HAVE_GETAUXVAL
 	if (getauxval(AT_SECURE))
@@ -51,23 +54,26 @@ int main(int argc, char **argv)
 	capng_get_caps_process();
 
 	if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
-		printf("[FAIL]\tWrong effective state%s\n", atsec);
+		ksft_print_msg("Wrong effective state%s\n", atsec);
 		return 1;
 	}
+
 	if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
-		printf("[FAIL]\tWrong permitted state%s\n", atsec);
+		ksft_print_msg("Wrong permitted state%s\n", atsec);
 		return 1;
 	}
+
 	if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
-		printf("[FAIL]\tWrong inheritable state%s\n", atsec);
+		ksft_print_msg("Wrong inheritable state%s\n", atsec);
 		return 1;
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
-		printf("[FAIL]\tWrong ambient state%s\n", atsec);
+		ksft_print_msg("Wrong ambient state%s\n", atsec);
 		return 1;
 	}
 
-	printf("[OK]\tCapabilities after execve were correct\n");
+	ksft_print_msg("%s: Capabilities after execve were correct\n",
+			"validate_cap:");
 	return 0;
 }
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index 98b1d65..b18b253 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -28,6 +28,12 @@
 	echo "CPU online/offline summary:"
 	online_cpus=`cat $SYSFS/devices/system/cpu/online`
 	online_max=${online_cpus##*-}
+
+	if [[ "$online_cpus" = "$online_max" ]]; then
+		echo "$msg: since there is only one cpu: $online_cpus"
+		exit 0
+	fi
+
 	echo -e "\t Cpus in online state: $online_cpus"
 
 	offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
@@ -89,8 +95,10 @@
 
 	if ! online_cpu $cpu; then
 		echo $FUNCNAME $cpu: unexpected fail >&2
+		exit 1
 	elif ! cpu_is_online $cpu; then
 		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
 	fi
 }
 
@@ -100,8 +108,10 @@
 
 	if online_cpu $cpu 2> /dev/null; then
 		echo $FUNCNAME $cpu: unexpected success >&2
+		exit 1
 	elif ! cpu_is_offline $cpu; then
 		echo $FUNCNAME $cpu: unexpected online >&2
+		exit 1
 	fi
 }
 
@@ -111,8 +121,10 @@
 
 	if ! offline_cpu $cpu; then
 		echo $FUNCNAME $cpu: unexpected fail >&2
+		exit 1
 	elif ! cpu_is_offline $cpu; then
 		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
 	fi
 }
 
@@ -122,8 +134,10 @@
 
 	if offline_cpu $cpu 2> /dev/null; then
 		echo $FUNCNAME $cpu: unexpected success >&2
+		exit 1
 	elif ! cpu_is_online $cpu; then
 		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
 	fi
 }
 
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 14a03ea..abc706c 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -8,15 +8,18 @@
 # Released under the terms of the GPL v2.
 
 usage() { # errno [message]
-[ "$2" ] && echo $2
+[ ! -z "$2" ] && echo $2
 echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
 echo " Options:"
 echo "		-h|--help  Show help message"
 echo "		-k|--keep  Keep passed test logs"
 echo "		-v|--verbose Increase verbosity of test messages"
 echo "		-vv        Alias of -v -v (Show all results in stdout)"
+echo "		-vvv       Alias of -v -v -v (Show all commands immediately)"
+echo "		--fail-unsupported Treat UNSUPPORTED as a failure"
 echo "		-d|--debug Debug mode (trace all shell commands)"
 echo "		-l|--logdir <dir> Save logs on the <dir>"
+echo "		            If <dir> is -, all logs output in console only"
 exit $1
 }
 
@@ -47,7 +50,7 @@
   local OPT_TEST_CASES=
   local OPT_TEST_DIR=
 
-  while [ "$1" ]; do
+  while [ ! -z "$1" ]; do
     case "$1" in
     --help|-h)
       usage 0
@@ -56,15 +59,20 @@
       KEEP_LOG=1
       shift 1
     ;;
-    --verbose|-v|-vv)
+    --verbose|-v|-vv|-vvv)
       VERBOSE=$((VERBOSE + 1))
       [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
+      [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
       shift 1
     ;;
     --debug|-d)
       DEBUG=1
       shift 1
     ;;
+    --fail-unsupported)
+      UNSUPPORTED_RESULT=1
+      shift 1
+    ;;
     --logdir|-l)
       LOG_DIR=$2
       shift 2
@@ -88,7 +96,7 @@
     ;;
     esac
   done
-  if [ "$OPT_TEST_CASES" ]; then
+  if [ ! -z "$OPT_TEST_CASES" ]; then
     TEST_CASES=$OPT_TEST_CASES
   fi
 }
@@ -108,6 +116,7 @@
 KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
+UNSUPPORTED_RESULT=0
 # Parse command-line options
 parse_opts $*
 
@@ -119,14 +128,20 @@
 fi
 
 # Preparing logs
-LOG_FILE=$LOG_DIR/ftracetest.log
-mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
-date > $LOG_FILE
+if [ "x$LOG_DIR" = "x-" ]; then
+  LOG_FILE=
+  date
+else
+  LOG_FILE=$LOG_DIR/ftracetest.log
+  mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+  date > $LOG_FILE
+fi
+
 prlog() { # messages
-  echo "$@" | tee -a $LOG_FILE
+  [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
 }
 catlog() { #file
-  cat $1 | tee -a $LOG_FILE
+  [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
 }
 prlog "=== Ftrace unit tests ==="
 
@@ -187,7 +202,7 @@
     $UNSUPPORTED)
       prlog "	[UNSUPPORTED]"
       UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
-      return 1 # this is not a bug, but the result should be reported.
+      return $UNSUPPORTED_RESULT # depends on use case
     ;;
     $XFAIL)
       prlog "	[XFAIL]"
@@ -247,12 +262,20 @@
 # Run one test case
 run_test() { # testfile
   local testname=`basename $1`
-  local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+  if [ ! -z "$LOG_FILE" ] ; then
+    local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+  else
+    local testlog=/proc/self/fd/1
+  fi
   export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
   testcase $1
   echo "execute$INSTANCE: "$1 > $testlog
   SIG_RESULT=0
-  if [ $VERBOSE -ge 2 ]; then
+  if [ -z "$LOG_FILE" ]; then
+    __run_test $1 2>&1
+  elif [ $VERBOSE -ge 3 ]; then
+    __run_test $1 | tee -a $testlog 2>&1
+  elif [ $VERBOSE -eq 2 ]; then
     __run_test $1 2>> $testlog | tee -a $testlog
   else
     __run_test $1 >> $testlog 2>&1
@@ -260,9 +283,9 @@
   eval_result $SIG_RESULT
   if [ $? -eq 0 ]; then
     # Remove test log if the test was done as it was expected.
-    [ $KEEP_LOG -eq 0 ] && rm $testlog
+    [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog
   else
-    [ $VERBOSE -ge 1 ] && catlog $testlog
+    [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
     TOTAL_RESULT=1
   fi
   rm -rf $TMPDIR
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index d24ab74..54cd5c4 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -394,9 +394,11 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Test requeue functionality\n", basename(argv[0]));
-	printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
-	       broadcast, locked, owner, timeout_ns);
+	ksft_print_header();
+	ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
+	ksft_print_msg(
+		"\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+		broadcast, locked, owner, timeout_ns);
 
 	/*
 	 * FIXME: unit_test is obsolete now that we parse options and the
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index e0a798a..08187a1 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -78,7 +78,8 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Detect mismatched requeue_pi operations\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
 	       basename(argv[0]));
 
 	if (pthread_create(&child, NULL, blocking_child, NULL)) {
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index 982f835..f0542a3 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -143,9 +143,10 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Test signal handling during requeue_pi\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Test signal handling during requeue_pi\n",
 	       basename(argv[0]));
-	printf("\tArguments: <none>\n");
+	ksft_print_msg("\tArguments: <none>\n");
 
 	sa.sa_handler = handle_signal;
 	sigemptyset(&sa.sa_mask);
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index bdc48dc..6216de8 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -97,8 +97,10 @@ int main(int argc, char **argv)
 		}
 	}
 
-	printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
-	       basename(argv[0]));
+	ksft_print_header();
+	ksft_print_msg(
+		"%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+		basename(argv[0]));
 
 	ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
 	if (ret < 0) {
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index 6aadd56..bab3dfe 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -68,9 +68,10 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Block on a futex and wait for timeout\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
 	       basename(argv[0]));
-	printf("\tArguments: timeout=%ldns\n", timeout_ns);
+	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
 
 	/* initialize timeout */
 	to.tv_sec = 0;
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index d237a8b..2697532 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -99,7 +99,8 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
 	       basename(argv[0]));
 
 
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 9a2c56f..da15a63 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -64,7 +64,8 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
 	       basename(argv[0]));
 
 	info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
index 4e79449..0198964 100644
--- a/tools/testing/selftests/futex/include/logging.h
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -109,22 +109,20 @@ void log_verbosity(int level)
  */
 void print_result(const char *test_name, int ret)
 {
-	const char *result = "Unknown return code";
-
 	switch (ret) {
 	case RET_PASS:
-		ksft_inc_pass_cnt();
-		result = PASS;
-		break;
+		ksft_test_result_pass("%s\n", test_name);
+		ksft_print_cnts();
+		return;
 	case RET_ERROR:
-		result = ERROR;
-		break;
+		ksft_test_result_error("%s\n", test_name);
+		ksft_print_cnts();
+		return;
 	case RET_FAIL:
-		ksft_inc_fail_cnt();
-		result = FAIL;
-		break;
+		ksft_test_result_fail("%s\n", test_name);
+		ksft_print_cnts();
+		return;
 	}
-	printf("selftests: %s [%s]\n", test_name, result);
 }
 
 /* log level macros */
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 08e90c2..1ae565e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -19,7 +19,8 @@
 #define KSFT_FAIL  1
 #define KSFT_XFAIL 2
 #define KSFT_XPASS 3
-#define KSFT_SKIP  4
+/* Treat skip as pass */
+#define KSFT_SKIP  KSFT_PASS
 
 /* counters */
 struct ksft_count {
@@ -28,6 +29,7 @@ struct ksft_count {
 	unsigned int ksft_xfail;
 	unsigned int ksft_xpass;
 	unsigned int ksft_xskip;
+	unsigned int ksft_error;
 };
 
 static struct ksft_count ksft_cnt;
@@ -36,7 +38,7 @@ static inline int ksft_test_num(void)
 {
 	return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
 		ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
-		ksft_cnt.ksft_xskip;
+		ksft_cnt.ksft_xskip + ksft_cnt.ksft_error;
 }
 
 static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; }
@@ -44,6 +46,14 @@ static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; }
 static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; }
 static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; }
 static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; }
+static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; }
+
+static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; }
+static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; }
+static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; }
+static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; }
+static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; }
+static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
 
 static inline void ksft_print_header(void)
 {
@@ -52,6 +62,10 @@ static inline void ksft_print_header(void)
 
 static inline void ksft_print_cnts(void)
 {
+	printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+		ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
+		ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
+		ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
 	printf("1..%d\n", ksft_test_num());
 }
 
@@ -101,6 +115,18 @@ static inline void ksft_test_result_skip(const char *msg, ...)
 	va_end(args);
 }
 
+static inline void ksft_test_result_error(const char *msg, ...)
+{
+	va_list args;
+
+	ksft_cnt.ksft_error++;
+
+	va_start(args, msg);
+	printf("not ok %d # error ", ksft_test_num());
+	vprintf(msg, args);
+	va_end(args);
+}
+
 static inline int ksft_exit_pass(void)
 {
 	ksft_print_cnts();
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index c56f72e..e81bd28 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -51,6 +51,9 @@
 #define __KSELFTEST_HARNESS_H
 
 #define _GNU_SOURCE
+#include <asm/types.h>
+#include <errno.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -84,6 +87,14 @@
  * E.g., #define TH_LOG_ENABLED 1
  *
  * If no definition is provided, logging is enabled by default.
+ *
+ * If there is no way to print an error message for the process running the
+ * test (e.g. not allowed to write to stderr), it is still possible to get the
+ * ASSERT_* number for which the test failed.  This behavior can be enabled by
+ * writing `_metadata->no_print = true;` before the check sequence that is
+ * unable to print.  When an error occur, instead of printing an error message
+ * and calling `abort(3)`, the test process call `_exit(2)` with the assert
+ * number as argument, which is then printed by the parent process.
  */
 #define TH_LOG(fmt, ...) do { \
 	if (TH_LOG_ENABLED) \
@@ -555,12 +566,18 @@
  * return while still providing an optional block to the API consumer.
  */
 #define OPTIONAL_HANDLER(_assert) \
-	for (; _metadata->trigger;  _metadata->trigger = __bail(_assert))
+	for (; _metadata->trigger; _metadata->trigger = \
+			__bail(_assert, _metadata->no_print, _metadata->step))
+
+#define __INC_STEP(_metadata) \
+	if (_metadata->passed && _metadata->step < 255) \
+		_metadata->step++;
 
 #define __EXPECT(_expected, _seen, _t, _assert) do { \
 	/* Avoid multiple evaluation of the cases */ \
 	__typeof__(_expected) __exp = (_expected); \
 	__typeof__(_seen) __seen = (_seen); \
+	if (_assert) __INC_STEP(_metadata); \
 	if (!(__exp _t __seen)) { \
 		unsigned long long __exp_print = (uintptr_t)__exp; \
 		unsigned long long __seen_print = (uintptr_t)__seen; \
@@ -576,6 +593,7 @@
 #define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
 	const char *__exp = (_expected); \
 	const char *__seen = (_seen); \
+	if (_assert) __INC_STEP(_metadata); \
 	if (!(strcmp(__exp, __seen) _t 0))  { \
 		__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
 		_metadata->passed = 0; \
@@ -590,6 +608,8 @@ struct __test_metadata {
 	int termsig;
 	int passed;
 	int trigger; /* extra handler after the evaluation */
+	__u8 step;
+	bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
 	struct __test_metadata *prev, *next;
 };
 
@@ -634,10 +654,13 @@ static inline void __register_test(struct __test_metadata *t)
 	}
 }
 
-static inline int __bail(int for_realz)
+static inline int __bail(int for_realz, bool no_print, __u8 step)
 {
-	if (for_realz)
+	if (for_realz) {
+		if (no_print)
+			_exit(step);
 		abort();
+	}
 	return 0;
 }
 
@@ -655,18 +678,24 @@ void __run_test(struct __test_metadata *t)
 		t->passed = 0;
 	} else if (child_pid == 0) {
 		t->fn(t);
-		_exit(t->passed);
+		/* return the step that failed or 0 */
+		_exit(t->passed ? 0 : t->step);
 	} else {
 		/* TODO(wad) add timeout support. */
 		waitpid(child_pid, &status, 0);
 		if (WIFEXITED(status)) {
-			t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0;
+			t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
 			if (t->termsig != -1) {
 				fprintf(TH_LOG_STREAM,
 					"%s: Test exited normally "
 					"instead of by signal (code: %d)\n",
 					t->name,
 					WEXITSTATUS(status));
+			} else if (!t->passed) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test failed at step #%d\n",
+					t->name,
+					WEXITSTATUS(status));
 			}
 		} else if (WIFSIGNALED(status)) {
 			t->passed = 0;
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 959273c..6936166 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -11,15 +11,26 @@
 
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
+.ONESHELL:
 define RUN_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+	@test_num=`echo 0`;
+	@echo "TAP version 13";
+	@for TEST in $(1); do				\
 		BASENAME_TEST=`basename $$TEST`;	\
-		cd `dirname $$TEST`; (./$$BASENAME_TEST && echo "selftests: $$BASENAME_TEST [PASS]") || echo "selftests:  $$BASENAME_TEST [FAIL]"; cd -;\
+		test_num=`echo $$test_num+1 | bc`;	\
+		echo "selftests: $$BASENAME_TEST";	\
+		echo "========================================";	\
+		if [ ! -x $$BASENAME_TEST ]; then	\
+			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
+		else					\
+			cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
+		fi;					\
 	done;
 endef
 
 run_tests: all
-	$(RUN_TESTS)
+	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS))
 
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index 67908b1..7f36172 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -33,7 +33,7 @@
 #include <unistd.h>
 
 #define MFD_DEF_SIZE 8192
-#define STACK_SIZE 65535
+#define STACK_SIZE 65536
 
 static int sys_memfd_create(const char *name,
 			    unsigned int flags)
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config
new file mode 100644
index 0000000..598d0a2
--- /dev/null
+++ b/tools/testing/selftests/nsfs/config
@@ -0,0 +1,3 @@
+CONFIG_USER_NS=y
+CONFIG_UTS_NS=y
+CONFIG_PID_NS=y
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
new file mode 100644
index 0000000..5a4a26e
--- /dev/null
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -0,0 +1,2 @@
+logs
+*uuid
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
index 83dd42b..d4064c7 100644
--- a/tools/testing/selftests/ptp/Makefile
+++ b/tools/testing/selftests/ptp/Makefile
@@ -1,3 +1,4 @@
+CFLAGS += -I../../../../usr/include/
 TEST_PROGS := testptp
 LDLIBS += -lrt
 all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 73f5ea6..4d6f92a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -107,7 +107,7 @@ TEST(mode_strict_support)
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
 	}
-	syscall(__NR_exit, 1);
+	syscall(__NR_exit, 0);
 }
 
 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index ccd0734..7d406c3 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -17,6 +17,8 @@
 #include <assert.h>
 #include <errno.h>
 
+#include "../kselftest.h"
+
 #ifndef SS_AUTODISARM
 #define SS_AUTODISARM  (1U << 31)
 #endif
@@ -41,8 +43,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 
 	if (sp < (unsigned long)sstack ||
 			sp >= (unsigned long)sstack + SIGSTKSZ) {
-		printf("[FAIL]\tSP is not on sigaltstack\n");
-		exit(EXIT_FAILURE);
+		ksft_exit_fail_msg("SP is not on sigaltstack\n");
 	}
 	/* put some data on stack. other sighandler will try to overwrite it */
 	aa = alloca(1024);
@@ -50,21 +51,22 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 	p = (struct stk_data *)(aa + 512);
 	strcpy(p->msg, msg);
 	p->flag = 1;
-	printf("[RUN]\tsignal USR1\n");
+	ksft_print_msg("[RUN]\tsignal USR1\n");
 	err = sigaltstack(NULL, &stk);
 	if (err) {
-		perror("[FAIL]\tsigaltstack()");
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags != SS_DISABLE)
-		printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n",
+		ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n",
 				stk.ss_flags);
 	else
-		printf("[OK]\tsigaltstack is disabled in sighandler\n");
+		ksft_test_result_pass(
+				"sigaltstack is disabled in sighandler\n");
 	swapcontext(&sc, &uc);
-	printf("%s\n", p->msg);
+	ksft_print_msg("%s\n", p->msg);
 	if (!p->flag) {
-		printf("[RUN]\tAborting\n");
+		ksft_exit_skip("[RUN]\tAborting\n");
 		exit(EXIT_FAILURE);
 	}
 }
@@ -74,13 +76,13 @@ void my_usr2(int sig, siginfo_t *si, void *u)
 	char *aa;
 	struct stk_data *p;
 
-	printf("[RUN]\tsignal USR2\n");
+	ksft_print_msg("[RUN]\tsignal USR2\n");
 	aa = alloca(1024);
 	/* dont run valgrind on this */
 	/* try to find the data stored by previous sighandler */
 	p = memmem(aa, 1024, msg, strlen(msg));
 	if (p) {
-		printf("[FAIL]\tsigaltstack re-used\n");
+		ksft_test_result_fail("sigaltstack re-used\n");
 		/* corrupt the data */
 		strcpy(p->msg, msg2);
 		/* tell other sighandler that his data is corrupted */
@@ -90,7 +92,7 @@ void my_usr2(int sig, siginfo_t *si, void *u)
 
 static void switch_fn(void)
 {
-	printf("[RUN]\tswitched to user ctx\n");
+	ksft_print_msg("[RUN]\tswitched to user ctx\n");
 	raise(SIGUSR2);
 	setcontext(&sc);
 }
@@ -101,6 +103,8 @@ int main(void)
 	stack_t stk;
 	int err;
 
+	ksft_print_header();
+
 	sigemptyset(&act.sa_mask);
 	act.sa_flags = SA_ONSTACK | SA_SIGINFO;
 	act.sa_sigaction = my_usr1;
@@ -110,19 +114,20 @@ int main(void)
 	sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
 		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 	if (sstack == MAP_FAILED) {
-		perror("mmap()");
+		ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
 		return EXIT_FAILURE;
 	}
 
 	err = sigaltstack(NULL, &stk);
 	if (err) {
-		perror("[FAIL]\tsigaltstack()");
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags == SS_DISABLE) {
-		printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
+		ksft_test_result_pass(
+				"Initial sigaltstack state was SS_DISABLE\n");
 	} else {
-		printf("[FAIL]\tInitial sigaltstack state was %x; "
+		ksft_exit_fail_msg("Initial sigaltstack state was %x; "
 		       "should have been SS_DISABLE\n", stk.ss_flags);
 		return EXIT_FAILURE;
 	}
@@ -133,7 +138,8 @@ int main(void)
 	err = sigaltstack(&stk, NULL);
 	if (err) {
 		if (errno == EINVAL) {
-			printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+			ksft_exit_skip(
+				"[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
 			/*
 			 * If test cases for the !SS_AUTODISARM variant were
 			 * added, we could still run them.  We don't have any
@@ -142,7 +148,9 @@ int main(void)
 			 */
 			return 0;
 		} else {
-			perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)");
+			ksft_exit_fail_msg(
+				"sigaltstack(SS_ONSTACK | SS_AUTODISARM)  %s\n",
+					strerror(errno));
 			return EXIT_FAILURE;
 		}
 	}
@@ -150,7 +158,7 @@ int main(void)
 	ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
 		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 	if (ustack == MAP_FAILED) {
-		perror("mmap()");
+		ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
 		return EXIT_FAILURE;
 	}
 	getcontext(&uc);
@@ -162,16 +170,17 @@ int main(void)
 
 	err = sigaltstack(NULL, &stk);
 	if (err) {
-		perror("[FAIL]\tsigaltstack()");
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags != SS_AUTODISARM) {
-		printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n",
+		ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n",
 				stk.ss_flags);
 		exit(EXIT_FAILURE);
 	}
-	printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n");
+	ksft_test_result_pass(
+			"sigaltstack is still SS_AUTODISARM after signal\n");
 
-	printf("[OK]\tTest passed\n");
+	ksft_exit_pass();
 	return 0;
 }
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
new file mode 100644
index 0000000..1e23fef
--- /dev/null
+++ b/tools/testing/selftests/splice/.gitignore
@@ -0,0 +1 @@
+default_file_splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index 9fc78e5..7e1187e 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,7 +1,4 @@
 TEST_PROGS := default_file_splice_read.sh
-EXTRA := default_file_splice_read
-all: $(TEST_PROGS) $(EXTRA)
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read
 
 include ../lib.mk
-
-EXTRA_CLEAN := $(EXTRA)
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 62fa666..7f79382 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -31,62 +31,83 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
 
+#include "../kselftest.h"
 #include "synctest.h"
 
 static int run_test(int (*test)(void), char *name)
 {
 	int result;
 	pid_t childpid;
+	int ret;
 
 	fflush(stdout);
 	childpid = fork();
 
 	if (childpid) {
 		waitpid(childpid, &result, 0);
-		if (WIFEXITED(result))
-			return WEXITSTATUS(result);
+		if (WIFEXITED(result)) {
+			ret = WEXITSTATUS(result);
+			if (!ret)
+				ksft_test_result_pass("[RUN]\t%s\n", name);
+			else
+				ksft_test_result_fail("[RUN]\t%s\n", name);
+			return ret;
+		}
 		return 1;
 	}
 
-	printf("[RUN]\tExecuting %s\n", name);
 	exit(test());
 }
 
-static int sync_api_supported(void)
+static void sync_api_supported(void)
 {
 	struct stat sbuf;
+	int ret;
 
-	return 0 == stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+	ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+	if (!ret)
+		return;
+
+	if (errno == ENOENT)
+		ksft_exit_skip("Sync framework not supported by kernel\n");
+
+	if (errno == EACCES)
+		ksft_exit_skip("Run Sync test as root.\n");
+
+	ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s",
+				strerror(errno));
 }
 
 int main(void)
 {
-	int err = 0;
+	int err;
 
-	if (!sync_api_supported()) {
-		printf("SKIP: Sync framework not supported by kernel\n");
-		return 0;
-	}
+	ksft_print_header();
 
-	printf("[RUN]\tTesting sync framework\n");
+	sync_api_supported();
 
-	err += RUN_TEST(test_alloc_timeline);
-	err += RUN_TEST(test_alloc_fence);
-	err += RUN_TEST(test_alloc_fence_negative);
+	ksft_print_msg("[RUN]\tTesting sync framework\n");
 
-	err += RUN_TEST(test_fence_one_timeline_wait);
-	err += RUN_TEST(test_fence_one_timeline_merge);
-	err += RUN_TEST(test_fence_merge_same_fence);
-	err += RUN_TEST(test_fence_multi_timeline_wait);
-	err += RUN_TEST(test_stress_two_threads_shared_timeline);
-	err += RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
-	err += RUN_TEST(test_merge_stress_random_merge);
+	RUN_TEST(test_alloc_timeline);
+	RUN_TEST(test_alloc_fence);
+	RUN_TEST(test_alloc_fence_negative);
 
+	RUN_TEST(test_fence_one_timeline_wait);
+	RUN_TEST(test_fence_one_timeline_merge);
+	RUN_TEST(test_fence_merge_same_fence);
+	RUN_TEST(test_fence_multi_timeline_wait);
+	RUN_TEST(test_stress_two_threads_shared_timeline);
+	RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
+	RUN_TEST(test_merge_stress_random_merge);
+
+	err = ksft_get_fail_cnt();
 	if (err)
-		printf("[FAIL]\tsync errors: %d\n", err);
-	else
-		printf("[OK]\tsync\n");
+		ksft_exit_fail_msg("%d out of %d sync tests failed\n",
+					err, ksft_test_num());
 
-	return !!err;
+	/* need this return to keep gcc happy */
+	return ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h
index e7d1d57..90a8e53 100644
--- a/tools/testing/selftests/sync/synctest.h
+++ b/tools/testing/selftests/sync/synctest.h
@@ -29,10 +29,11 @@
 #define SELFTESTS_SYNCTEST_H
 
 #include <stdio.h>
+#include "../kselftest.h"
 
 #define ASSERT(cond, msg) do { \
 	if (!(cond)) { \
-		printf("[ERROR]\t%s", (msg)); \
+		ksft_print_msg("[ERROR]\t%s", (msg)); \
 		return 1; \
 	} \
 } while (0)
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index a9b8613..ae45931 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -1,5 +1,4 @@
-BUILD_FLAGS = -DKTEST
-CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
 LDFLAGS += -lrt -lpthread -lm
 
 # these are all "safe" tests that don't modify
@@ -7,9 +6,11 @@
 TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
 	     inconsistency-check raw_skew threadtest rtctest
 
-TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch freq-step leap-a-day \
-		      leapcrash set-tai set-2038 set-tz rtctest_setdate
+		      leapcrash set-tai set-2038 set-tz
+
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate
 
 
 include ../lib.mk
@@ -18,16 +19,4 @@
 # and may modify the system time or trigger
 # other behavior like suspend
 run_destructive_tests: run_tests
-	./alarmtimer-suspend
-	./valid-adjtimex
-	./adjtick
-	./change_skew
-	./skew_consistency
-	./clocksource-switch
-	./freq-step
-	./leap-a-day -s -i 10
-	./leapcrash
-	./set-tz
-	./set-tai
-	./set-2038
-
+	$(call RUN_TESTS, $(DESTRUCTIVE_TESTS))
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 9887fd5..0caca3a 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -23,18 +23,7 @@
 #include <sys/timex.h>
 #include <time.h>
 
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CLOCK_MONOTONIC_RAW	4
 
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 2b361b8..4da09db 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -28,18 +28,7 @@
 #include <signal.h>
 #include <stdlib.h>
 #include <pthread.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CLOCK_REALTIME			0
 #define CLOCK_MONOTONIC			1
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index cb19689..c4eab71 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -28,18 +28,7 @@
 #include <sys/time.h>
 #include <sys/timex.h>
 #include <time.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index 5ff1653..bfc974b 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -34,18 +34,7 @@
 #include <fcntl.h>
 #include <string.h>
 #include <sys/wait.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 
 int get_clocksources(char list[][30])
@@ -61,7 +50,7 @@ int get_clocksources(char list[][30])
 
 	close(fd);
 
-	for (i = 0; i < 30; i++)
+	for (i = 0; i < 10; i++)
 		list[i][0] = '\0';
 
 	head = buf;
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
index 74c60e8..022d3ff 100644
--- a/tools/testing/selftests/timers/inconsistency-check.c
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -28,18 +28,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CALLS_PER_LOOP 64
 #define NSEC_PER_SEC 1000000000ULL
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index fb46ad6..19e46ed 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -48,18 +48,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 #define CLOCK_TAI 11
@@ -190,18 +179,18 @@ int main(int argc, char **argv)
 	struct sigevent se;
 	struct sigaction act;
 	int signum = SIGRTMAX;
-	int settime = 0;
+	int settime = 1;
 	int tai_time = 0;
 	int insert = 1;
-	int iterations = -1;
+	int iterations = 10;
 	int opt;
 
 	/* Process arguments */
 	while ((opt = getopt(argc, argv, "sti:")) != -1) {
 		switch (opt) {
-		case 's':
-			printf("Setting time to speed up testing\n");
-			settime = 1;
+		case 'w':
+			printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n");
+			settime = 0;
 			break;
 		case 'i':
 			iterations = atoi(optarg);
@@ -210,9 +199,10 @@ int main(int argc, char **argv)
 			tai_time = 1;
 			break;
 		default:
-			printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]);
-			printf("	-s: Set time to right before leap second each iteration\n");
-			printf("	-i: Number of iterations\n");
+			printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]);
+			printf("	-w: Set flag and wait for leap second each iteration");
+			printf("	    (default sets time to right before leapsecond)\n");
+			printf("	-i: Number of iterations (-1 = infinite, default is 10)\n");
 			printf("	-t: Print TAI time\n");
 			exit(-1);
 		}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index a1071bd..830c462 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -22,20 +22,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
-
-
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
index a2a3924..1867db5 100644
--- a/tools/testing/selftests/timers/mqueue-lat.c
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -29,18 +29,7 @@
 #include <signal.h>
 #include <errno.h>
 #include <mqueue.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index ff942ff..8adb0bb 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -27,18 +27,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
index 2d7898f..c3c3dc1 100644
--- a/tools/testing/selftests/timers/nsleep-lat.c
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -24,18 +24,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 30906bf..ca6cd14 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -25,19 +25,7 @@
 #include <sys/time.h>
 #include <sys/timex.h>
 #include <time.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
-
 
 #define CLOCK_MONOTONIC_RAW		4
 #define NSEC_PER_SEC 1000000000LL
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
index f61170f..411eff6 100644
--- a/tools/testing/selftests/timers/rtctest.c
+++ b/tools/testing/selftests/timers/rtctest.c
@@ -221,6 +221,11 @@ int main(int argc, char **argv)
 	/* Read the current alarm settings */
 	retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
 	if (retval == -1) {
+		if (errno == EINVAL) {
+			fprintf(stderr,
+					"\n...EINVAL reading current alarm setting.\n");
+			goto test_PIE;
+		}
 		perror("RTC_ALM_READ ioctl");
 		exit(errno);
 	}
@@ -231,7 +236,7 @@ int main(int argc, char **argv)
 	/* Enable alarm interrupts */
 	retval = ioctl(fd, RTC_AIE_ON, 0);
 	if (retval == -1) {
-		if (errno == EINVAL) {
+		if (errno == EINVAL || errno == EIO) {
 			fprintf(stderr,
 				"\n...Alarm IRQs not supported.\n");
 			goto test_PIE;
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
index c8a7e14..688cfd8 100644
--- a/tools/testing/selftests/timers/set-2038.c
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -27,18 +27,7 @@
 #include <unistd.h>
 #include <time.h>
 #include <sys/time.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
index dc88dbc..70fed27 100644
--- a/tools/testing/selftests/timers/set-tai.c
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -23,18 +23,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 int set_tai(int offset)
 {
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 15434da..9c92b7b 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -28,18 +28,7 @@
 #include <signal.h>
 #include <stdlib.h>
 #include <pthread.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CLOCK_REALTIME			0
 #define CLOCK_MONOTONIC			1
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
index f418492..877fd55 100644
--- a/tools/testing/selftests/timers/set-tz.c
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -23,18 +23,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 int set_tz(int min, int dst)
 {
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 2a996e0..022b711 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -35,18 +35,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/wait.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index e632e11..759c9c0 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -21,19 +21,7 @@
 #include <stdlib.h>
 #include <sys/time.h>
 #include <pthread.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
-
 
 /* serializes shared list access */
 pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 60fe3c5..d9d3ab9 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -32,18 +32,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 #define USEC_PER_SEC 1000000LL
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index a74c9d7..a1391be 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -9,12 +9,25 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <signal.h>
+#include <getopt.h>
 #include <sys/ioctl.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 
+#define DEFAULT_PING_RATE	1
+
 int fd;
 const char v = 'V';
+static const char sopts[] = "bdehp:t:";
+static const struct option lopts[] = {
+	{"bootstatus",          no_argument, NULL, 'b'},
+	{"disable",             no_argument, NULL, 'd'},
+	{"enable",              no_argument, NULL, 'e'},
+	{"help",                no_argument, NULL, 'h'},
+	{"pingrate",      required_argument, NULL, 'p'},
+	{"timeout",       required_argument, NULL, 't'},
+	{NULL,                  no_argument, NULL, 0x0}
+};
 
 /*
  * This function simply sends an IOCTL to the driver, which in turn ticks
@@ -23,12 +36,12 @@ const char v = 'V';
  */
 static void keep_alive(void)
 {
-    int dummy;
-    int ret;
+	int dummy;
+	int ret;
 
-    ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
-    if (!ret)
-        printf(".");
+	ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+	if (!ret)
+		printf(".");
 }
 
 /*
@@ -38,75 +51,110 @@ static void keep_alive(void)
 
 static void term(int sig)
 {
-    int ret = write(fd, &v, 1);
+	int ret = write(fd, &v, 1);
 
-    close(fd);
-    if (ret < 0)
-	printf("\nStopping watchdog ticks failed (%d)...\n", errno);
-    else
-	printf("\nStopping watchdog ticks...\n");
-    exit(0);
+	close(fd);
+	if (ret < 0)
+		printf("\nStopping watchdog ticks failed (%d)...\n", errno);
+	else
+		printf("\nStopping watchdog ticks...\n");
+	exit(0);
+}
+
+static void usage(char *progname)
+{
+	printf("Usage: %s [options]\n", progname);
+	printf(" -b, --bootstatus    Get last boot status (Watchdog/POR)\n");
+	printf(" -d, --disable       Turn off the watchdog timer\n");
+	printf(" -e, --enable        Turn on the watchdog timer\n");
+	printf(" -h, --help          Print the help message\n");
+	printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
+	printf(" -t, --timeout=T     Set timeout to T seconds\n");
+	printf("\n");
+	printf("Parameters are parsed left-to-right in real-time.\n");
+	printf("Example: %s -d -t 10 -p 5 -e\n", progname);
 }
 
 int main(int argc, char *argv[])
 {
-    int flags;
-    unsigned int ping_rate = 1;
-    int ret;
-    int i;
+	int flags;
+	unsigned int ping_rate = DEFAULT_PING_RATE;
+	int ret;
+	int c;
+	int oneshot = 0;
 
-    setbuf(stdout, NULL);
+	setbuf(stdout, NULL);
 
-    fd = open("/dev/watchdog", O_WRONLY);
+	fd = open("/dev/watchdog", O_WRONLY);
 
-    if (fd == -1) {
-	printf("Watchdog device not enabled.\n");
-	exit(-1);
-    }
+	if (fd == -1) {
+		printf("Watchdog device not enabled.\n");
+		exit(-1);
+	}
 
-    for (i = 1; i < argc; i++) {
-        if (!strncasecmp(argv[i], "-d", 2)) {
-            flags = WDIOS_DISABLECARD;
-            ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
-            if (!ret)
-                printf("Watchdog card disabled.\n");
-        } else if (!strncasecmp(argv[i], "-e", 2)) {
-            flags = WDIOS_ENABLECARD;
-            ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
-            if (!ret)
-                printf("Watchdog card enabled.\n");
-        } else if (!strncasecmp(argv[i], "-t", 2) && argv[2]) {
-            flags = atoi(argv[i + 1]);
-            ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
-            if (!ret)
-                printf("Watchdog timeout set to %u seconds.\n", flags);
-            i++;
-        } else if (!strncasecmp(argv[i], "-p", 2) && argv[2]) {
-            ping_rate = strtoul(argv[i + 1], NULL, 0);
-            printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
-            i++;
-        } else {
-            printf("-d to disable, -e to enable, -t <n> to set "
-                "the timeout,\n-p <n> to set the ping rate, and ");
-            printf("run by itself to tick the card.\n");
-            printf("Parameters are parsed left-to-right in real-time.\n");
-            printf("Example: %s -d -t 10 -p 5 -e\n", argv[0]);
-            goto end;
-        }
-    }
+	while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+		switch (c) {
+		case 'b':
+			flags = 0;
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+			if (!ret)
+				printf("Last boot is caused by: %s.\n", (flags != 0) ?
+					"Watchdog" : "Power-On-Reset");
+			else
+				printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno));
+			break;
+		case 'd':
+			flags = WDIOS_DISABLECARD;
+			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+			if (!ret)
+				printf("Watchdog card disabled.\n");
+			else
+				printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno));
+			break;
+		case 'e':
+			flags = WDIOS_ENABLECARD;
+			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+			if (!ret)
+				printf("Watchdog card enabled.\n");
+			else
+				printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno));
+			break;
+		case 'p':
+			ping_rate = strtoul(optarg, NULL, 0);
+			if (!ping_rate)
+				ping_rate = DEFAULT_PING_RATE;
+			printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
+			break;
+		case 't':
+			flags = strtoul(optarg, NULL, 0);
+			ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+			if (!ret)
+				printf("Watchdog timeout set to %u seconds.\n", flags);
+			else
+				printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno));
+			break;
+		default:
+			usage(argv[0]);
+			goto end;
+		}
+	}
 
-    printf("Watchdog Ticking Away!\n");
+	if (oneshot)
+		goto end;
 
-    signal(SIGINT, term);
+	printf("Watchdog Ticking Away!\n");
 
-    while(1) {
-	keep_alive();
-	sleep(ping_rate);
-    }
+	signal(SIGINT, term);
+
+	while (1) {
+		keep_alive();
+		sleep(ping_rate);
+	}
 end:
-    ret = write(fd, &v, 1);
-    if (ret < 0)
-	printf("Stopping watchdog ticks failed (%d)...\n", errno);
-    close(fd);
-    return 0;
+	ret = write(fd, &v, 1);
+	if (ret < 0)
+		printf("Stopping watchdog ticks failed (%d)...\n", errno);
+	close(fd);
+	return 0;
 }
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index a39a1e1..b9f68e4 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 		&& !v->arch.power_off && !v->arch.pause);
 }
 
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return vcpu_mode_priv(vcpu);
+}
+
 /* Just ensure a guest exit from a particular CPU */
 static void exit_vm_noop(void *info)
 {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 2ea21da..b36945d 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1454,25 +1454,6 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 		kvm_set_pfn_accessed(pfn);
 }
 
-static bool is_abort_sea(unsigned long fault_status)
-{
-	switch (fault_status) {
-	case FSC_SEA:
-	case FSC_SEA_TTW0:
-	case FSC_SEA_TTW1:
-	case FSC_SEA_TTW2:
-	case FSC_SEA_TTW3:
-	case FSC_SECC:
-	case FSC_SECC_TTW0:
-	case FSC_SECC_TTW1:
-	case FSC_SECC_TTW2:
-	case FSC_SECC_TTW3:
-		return true;
-	default:
-		return false;
-	}
-}
-
 /**
  * kvm_handle_guest_abort - handles all 2nd stage aborts
  * @vcpu:	the VCPU pointer
@@ -1498,20 +1479,21 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 
 	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 
-	/*
-	 * The host kernel will handle the synchronous external abort. There
-	 * is no need to pass the error into the guest.
-	 */
-	if (is_abort_sea(fault_status)) {
+	/* Synchronous External Abort? */
+	if (kvm_vcpu_dabt_isextabt(vcpu)) {
+		/*
+		 * For RAS the host kernel may handle this abort.
+		 * There is no need to pass the error into the guest.
+		 */
 		if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
 			return 1;
-	}
 
-	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
-	if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
-		kvm_inject_vabt(vcpu);
-		return 1;
+		if (unlikely(!is_iabt)) {
+			kvm_inject_vabt(vcpu);
+			return 1;
+		}
 	}
 
 	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
index 7072ab7..10b3817 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -234,7 +234,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations vgic_debug_seq_ops = {
+static const struct seq_operations vgic_debug_seq_ops = {
 	.start = vgic_debug_start,
 	.next  = vgic_debug_next,
 	.stop  = vgic_debug_stop,
@@ -255,7 +255,7 @@ static int debug_open(struct inode *inode, struct file *file)
 	return ret;
 };
 
-static struct file_operations vgic_debug_fops = {
+static const struct file_operations vgic_debug_fops = {
 	.owner   = THIS_MODULE,
 	.open    = debug_open,
 	.read    = seq_read,
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index aa6b68d..f51c1e1 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -144,7 +144,6 @@ struct its_ite {
 
 	struct vgic_irq *irq;
 	struct its_collection *collection;
-	u32 lpi;
 	u32 event_id;
 };
 
@@ -813,7 +812,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
 /* Must be called with its_lock mutex held */
 static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
 					  struct its_collection *collection,
-					  u32 lpi_id, u32 event_id)
+					  u32 event_id)
 {
 	struct its_ite *ite;
 
@@ -823,7 +822,6 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
 
 	ite->event_id	= event_id;
 	ite->collection = collection;
-	ite->lpi = lpi_id;
 
 	list_add_tail(&ite->ite_list, &device->itt_head);
 	return ite;
@@ -873,7 +871,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
 		new_coll = collection;
 	}
 
-	ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id);
+	ite = vgic_its_alloc_ite(device, collection, event_id);
 	if (IS_ERR(ite)) {
 		if (new_coll)
 			vgic_its_free_collection(its, coll_id);
@@ -1848,7 +1846,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
 
 	next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
 	val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
-	       ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) |
+	       ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
 		ite->collection->collection_id;
 	val = cpu_to_le64(val);
 	return kvm_write_guest(kvm, gpa, &val, ite_esz);
@@ -1895,7 +1893,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
 	if (!collection)
 		return -EINVAL;
 
-	ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id);
+	ite = vgic_its_alloc_ite(dev, collection, event_id);
 	if (IS_ERR(ite))
 		return PTR_ERR(ite);
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 37522e6..b3d4a10 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -303,6 +303,51 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 	vgic_set_vmcr(vcpu, &vmcr);
 }
 
+static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu,
+					gpa_t addr, unsigned int len)
+{
+	int n; /* which APRn is this */
+
+	n = (addr >> 2) & 0x3;
+
+	if (kvm_vgic_global_state.type == VGIC_V2) {
+		/* GICv2 hardware systems support max. 32 groups */
+		if (n != 0)
+			return 0;
+		return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr;
+	} else {
+		struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+		if (n > vgic_v3_max_apr_idx(vcpu))
+			return 0;
+		/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
+		return vgicv3->vgic_ap1r[n];
+	}
+}
+
+static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu,
+				gpa_t addr, unsigned int len,
+				unsigned long val)
+{
+	int n; /* which APRn is this */
+
+	n = (addr >> 2) & 0x3;
+
+	if (kvm_vgic_global_state.type == VGIC_V2) {
+		/* GICv2 hardware systems support max. 32 groups */
+		if (n != 0)
+			return;
+		vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val;
+	} else {
+		struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+		if (n > vgic_v3_max_apr_idx(vcpu))
+			return;
+		/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
+		vgicv3->vgic_ap1r[n] = val;
+	}
+}
+
 static const struct vgic_register_region vgic_v2_dist_registers[] = {
 	REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
 		vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
@@ -364,7 +409,7 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = {
 		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO,
-		vgic_mmio_read_raz, vgic_mmio_write_wi, 16,
+		vgic_mmio_read_apr, vgic_mmio_write_apr, 16,
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
 		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index bba7fa2..bf9ceab 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -220,4 +220,20 @@ int vgic_debug_destroy(struct kvm *kvm);
 bool lock_all_vcpus(struct kvm *kvm);
 void unlock_all_vcpus(struct kvm *kvm);
 
+static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
+
+	/*
+	 * num_pri_bits are initialized with HW supported values.
+	 * We can rely safely on num_pri_bits even if VM has not
+	 * restored ICC_CTLR_EL1 before restoring APnR registers.
+	 */
+	switch (cpu_if->num_pri_bits) {
+	case 7: return 3;
+	case 6: return 1;
+	default: return 0;
+	}
+}
+
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d81f6d..6ed1c20 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1609,7 +1609,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
 			    struct page **pages, int nr_pages)
 {
 	unsigned long addr;
-	gfn_t entry;
+	gfn_t entry = 0;
 
 	addr = gfn_to_hva_many(slot, gfn, &entry);
 	if (kvm_is_error_hva(addr))
@@ -1928,6 +1928,7 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
 		 * verify that the entire region is valid here.
 		 */
 		while (start_gfn <= end_gfn) {
+			nr_pages_avail = 0;
 			ghc->memslot = __gfn_to_memslot(slots, start_gfn);
 			ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
 						   &nr_pages_avail);
@@ -2275,7 +2276,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 #endif
 }
 
-void kvm_vcpu_on_spin(struct kvm_vcpu *me)
+void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 {
 	struct kvm *kvm = me->kvm;
 	struct kvm_vcpu *vcpu;
@@ -2306,6 +2307,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
 				continue;
+			if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu))
+				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;